1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 ////////////////////////////////////////////////////////////////////////////////
35 //
36 // XmlParser
37 //
|
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
39 // serveral rules for well-formed XML:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
42 mike 1.13 //
|
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
44 mike 1.13 //
|
45 david.dillard 1.32 // 2. Comments have the form:
|
46 mike 1.13 //
|
47 david.dillard 1.32 // <!-- blah blah blah -->
|
48 mike 1.13 //
|
49 david.dillard 1.32 // 3. The following entity references are supported:
|
50 mike 1.13 //
|
51 david.dillard 1.32 // & - ampersand
52 // < - less-than
53 // > - greater-than
54 // " - full quote
55 // &apos - apostrophe
|
56 mike 1.13 //
|
57 kumpf 1.18 // as well as character (numeric) references:
|
58 mike 1.35 //
|
59 kumpf 1.18 // 1 - decimal reference for character '1'
60 // 1 - hexadecimal reference for character '1'
61 //
|
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
63 mike 1.13 //
|
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
65 mike 1.13 //
|
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
67 mike 1.13 //
|
68 david.dillard 1.32 // <![CDATA[
69 // ...
70 // ]]>
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
73 mike 1.13 //
|
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
75 // XmlAttribute values must be delimited.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 8. <!DOCTYPE...>
|
78 mike 1.13 //
79 // TODO:
80 //
|
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
83 mike 1.13 // rules rather than references to files).
84 //
|
85 david.dillard 1.32 // Remove newlines from string literals:
|
86 mike 1.13 //
87 // Example: <xyz x="hello
|
88 david.dillard 1.32 // world">
|
89 mike 1.13 //
90 ////////////////////////////////////////////////////////////////////////////////
91
|
92 sage 1.14 #include <Pegasus/Common/Config.h>
|
93 mike 1.13 #include <cctype>
94 #include <cstdio>
95 #include <cstdlib>
96 #include <cstring>
97 #include "XmlParser.h"
98 #include "Logger.h"
|
99 chuck 1.19 #include "ExceptionRep.h"
|
100 mike 1.34 #include "CharSet.h"
|
101 mike 1.13
102 PEGASUS_NAMESPACE_BEGIN
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
|
114 david.dillard 1.32 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
|
122 mike 1.13 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
|
142 chuck 1.26
143 // Implements a check for a whitespace character, without calling
144 // isspace( ). The isspace( ) function is locale-sensitive,
145 // and incorrectly flags some chars above 0x7f as whitespace. This
146 // causes the XmlParser to incorrectly parse UTF-8 data.
147 //
148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
149 // defines white space as:
|
150 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
151 mike 1.34 static inline int _isspace(char c)
|
152 chuck 1.26 {
|
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
154 chuck 1.26 }
155
|
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
157
158 ////////////////////////////////////////////////////////////////////////////////
159 //
160 // XmlException
161 //
162 ////////////////////////////////////////////////////////////////////////////////
163
164 static const char* _xmlMessages[] =
165 {
166 "Bad opening element",
167 "Bad closing element",
168 "Bad attribute name",
169 "Exepected equal sign",
170 "Bad attribute value",
171 "A \"--\" sequence found within comment",
172 "Unterminated comment",
173 "Unterminated CDATA block",
174 "Unterminated DOCTYPE",
175 "Too many attributes: parser only handles 10",
176 "Malformed reference",
177 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence",
178 "Closing element does not match opening element",
179 "One or more tags are still open",
180 "More than one root element was encountered",
181 "Validation error",
182 "Semantic error"
183 };
184
|
185 david.dillard 1.32 static const char* _xmlKeys[] =
|
186 chuck 1.19 {
|
187 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
188 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
189 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
190 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
191 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
192 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
193 "Common.XmlParser.UNTERMINATED_COMMENT",
194 "Common.XmlParser.UNTERMINATED_CDATA",
195 "Common.XmlParser.UNTERMINATED_DOCTYPE",
196 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
197 "Common.XmlParser.MALFORMED_REFERENCE",
198 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
199 "Common.XmlParser.START_END_MISMATCH",
|
200 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
201 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
202 "Common.XmlParser.VALIDATION_ERROR",
203 "Common.XmlParser.SEMANTIC_ERROR"
204 };
205
|
206 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
207 chuck 1.19 /*
|
208 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
209 {
210 String result = _xmlMessages[Uint32(code) - 1];
211
212 char buffer[32];
213 sprintf(buffer, "%d", line);
214 result.append(": on line ");
215 result.append(buffer);
216
217 if (message.size())
218 {
|
219 david.dillard 1.32 result.append(": ");
220 result.append(message);
|
221 mike 1.13 }
222
223 return result;
224 }
|
225 chuck 1.19 */
226
|
227 kumpf 1.40 static MessageLoaderParms _formMessage(
228 Uint32 code,
229 Uint32 line,
230 const String& message)
|
231 chuck 1.19 {
232 String dftMsg = _xmlMessages[Uint32(code) - 1];
233 String key = _xmlKeys[Uint32(code) - 1];
|
234 david.dillard 1.32 String msg = message;
|
235 chuck 1.19
236 dftMsg.append(": on line $0");
237 if (message.size())
238 {
|
239 david.dillard 1.32 msg = ": " + msg;
240 dftMsg.append("$1");
241 }
|
242 chuck 1.19
243 return MessageLoaderParms(key, dftMsg, line ,msg);
244 }
245
246 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
247 {
248 String dftMsg = _xmlMessages[Uint32(code) - 1];
249 String key = _xmlKeys[Uint32(code) - 1];
250
251 dftMsg.append(": on line $0");
|
252 david.dillard 1.32
|
253 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
254 }
255
|
256 mike 1.13
257 XmlException::XmlException(
|
258 david.dillard 1.32 XmlException::Code code,
|
259 mike 1.13 Uint32 lineNumber,
|
260 david.dillard 1.32 const String& message)
|
261 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
262 {
263
264 }
265
|
266 chuck 1.19
267 XmlException::XmlException(
|
268 david.dillard 1.32 XmlException::Code code,
|
269 chuck 1.19 Uint32 lineNumber,
|
270 david.dillard 1.32 MessageLoaderParms& msgParms)
|
271 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
272 {
|
273 david.dillard 1.32 if (msgParms.default_msg.size())
|
274 humberto 1.21 {
|
275 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
276 }
277 _rep->message.append(MessageLoader::getMessage(msgParms));
|
278 chuck 1.19 }
279
280
|
281 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
282 //
283 // XmlValidationError
284 //
285 ////////////////////////////////////////////////////////////////////////////////
286
287 XmlValidationError::XmlValidationError(
288 Uint32 lineNumber,
289 const String& message)
290 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
291 {
292 }
293
|
294 chuck 1.19
295 XmlValidationError::XmlValidationError(
296 Uint32 lineNumber,
297 MessageLoaderParms& msgParms)
298 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
299 {
300 }
301
302
|
303 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
304 //
305 // XmlSemanticError
306 //
307 ////////////////////////////////////////////////////////////////////////////////
308
309 XmlSemanticError::XmlSemanticError(
310 Uint32 lineNumber,
311 const String& message)
312 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
313 {
314 }
|
315 chuck 1.19
316
317 XmlSemanticError::XmlSemanticError(
318 Uint32 lineNumber,
319 MessageLoaderParms& msgParms)
320 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
321 {
322 }
323
|
324 mike 1.13
325 ////////////////////////////////////////////////////////////////////////////////
326 //
327 // XmlParser
328 //
329 ////////////////////////////////////////////////////////////////////////////////
330
|
331 kumpf 1.40 XmlParser::XmlParser(char* text)
332 : _line(1),
333 _current(text),
334 _restoreChar('\0'),
335 _foundRoot(false)
|
336 mike 1.13 {
337 }
338
|
339 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
340 {
341 while (*p && _isspace(*p))
342 {
343 if (*p == '\n')
344 line++;
345
346 p++;
347 }
348 }
349
|
350 kumpf 1.37 static int _getEntityRef(char*& p)
351 {
352 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
353 {
354 p += 3;
355 return '>';
356 }
357
358 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
359 {
360 p += 3;
361 return '<';
362 }
363
364 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
365 (p[4] == ';'))
366 {
367 p += 5;
368 return '\'';
369 }
370
371 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
372 (p[4] == ';'))
373 {
374 p += 5;
375 return '"';
376 }
377
378 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
379 {
380 p += 4;
381 return '&';
382 }
383
384 return -1;
385 }
386
|
387 kumpf 1.42.4.1 static inline int _getCharRef(char*& p)
|
388 kumpf 1.37 {
389 char* end;
390 unsigned long ch;
|
391 kumpf 1.42.4.1 Boolean hex = false;
|
392 kumpf 1.37
|
393 kumpf 1.42.4.1 if (*p == 'x')
|
394 kumpf 1.37 {
|
395 kumpf 1.42.4.1 hex = true;
396 ch = strtoul(++p, &end, 16);
|
397 kumpf 1.37 }
398 else
399 {
400 ch = strtoul(p, &end, 10);
401 }
402
403 if ((end == p) || (*end != ';') || (ch > 255))
404 {
405 return -1;
406 }
407
408 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
409 {
410 return -1;
411 }
412
413 p = end + 1;
414
415 return ch;
416 }
417
|
418 kumpf 1.42.4.1 // Parse an entity reference or a character reference
419 static inline int _getRef(Uint32 line, char*& p)
|
420 kumpf 1.37 {
|
421 kumpf 1.42.4.1 int ch;
|
422 kumpf 1.37
|
423 kumpf 1.42.4.1 if (*p == '#')
424 {
425 ch = _getCharRef(++p);
426 }
427 else
428 {
429 ch = _getEntityRef(p);
430 }
431
432 if (ch == -1)
433 {
434 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
435 }
|
436 kumpf 1.37
|
437 kumpf 1.42.4.1 return ch;
438 }
439
440 static inline void _normalizeElementValue(
441 Uint32& line,
442 char*& p)
443 {
|
444 kumpf 1.37 // Process one character at a time:
445
446 char* q = p;
447
|
448 kumpf 1.42.4.1 while (*p && (*p != '<'))
|
449 kumpf 1.37 {
450 if (_isspace(*p))
451 {
|
452 kumpf 1.42.4.1 // Trim whitespace from the end of the value, but do not compress
453 // whitespace within the value.
454
455 const char* start = p;
|
456 kumpf 1.37
457 if (*p++ == '\n')
458 {
459 line++;
460 }
461
462 _skipWhitespace(line, p);
463
|
464 kumpf 1.42.4.1 if (*p && (*p != '<'))
|
465 kumpf 1.37 {
|
466 kumpf 1.42.4.1 // Transfer internal whitespace to q without compressing it.
467 const char* i = start;
468 while (i < p)
|
469 kumpf 1.37 {
|
470 kumpf 1.42.4.1 *q++ = *i++;
|
471 kumpf 1.37 }
472 }
473 else
474 {
|
475 kumpf 1.42.4.1 // Do not transfer trailing whitespace to q.
476 break;
|
477 kumpf 1.37 }
|
478 kumpf 1.42.4.1 }
479 else if (*p == '&')
480 {
481 // Process an entity reference or a character reference.
482
483 *q++ = _getRef(line, ++p);
484 }
485 else
486 {
487 *q++ = *p++;
488 }
489 }
490
491 // If q got behind p, it is safe and necessary to null-terminate q
|
492 kumpf 1.37
|
493 kumpf 1.42.4.1 if (q != p)
494 {
495 *q = '\0';
496 }
497 }
498
499 static inline void _normalizeAttributeValue(
500 Uint32& line,
501 char*& p,
502 char end_char,
503 char*& start)
504 {
505 // Skip over leading whitespace:
506
507 _skipWhitespace(line, p);
508 start = p;
509
510 // Process one character at a time:
511
512 char* q = p;
513
514 kumpf 1.42.4.1 while (*p && (*p != end_char))
515 {
516 if (_isspace(*p))
517 {
518 // Compress sequences of whitespace characters to a single space
519 // character. Update line number when newlines encountered.
520
521 if (*p++ == '\n')
|
522 kumpf 1.37 {
|
523 kumpf 1.42.4.1 line++;
|
524 kumpf 1.37 }
525
|
526 kumpf 1.42.4.1 *q++ = ' ';
527
528 _skipWhitespace(line, p);
529 }
530 else if (*p == '&')
531 {
532 // Process an entity reference or a character reference.
533
534 *q++ = _getRef(line, ++p);
|
535 kumpf 1.37 }
536 else
537 {
538 *q++ = *p++;
539 }
540 }
541
542 // Remove single trailing whitespace (consecutive whitespaces already
543 // compressed above). Since p >= q, we can tell if we need to strip a
544 // trailing space from q by looking at the end of p. We must not look at
545 // the last character of p, though, if p is an empty string.
|
546 kumpf 1.42.4.1 Boolean adjust_q = (p != start) && _isspace(p[-1]);
|
547 kumpf 1.37
|
548 kumpf 1.42.4.1 // We encountered a the end_char or a zero-terminator.
549
550 *q = *p;
551
552 if (adjust_q)
|
553 kumpf 1.37 {
554 q--;
555 }
556
557 // If q got behind p, it is safe and necessary to null-terminate q
558
559 if (q != p)
560 {
561 *q = '\0';
562 }
563 }
564
|
565 venkat.puvvada 1.41 Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)
|
566 mike 1.13 {
567 if (!_putBackStack.isEmpty())
568 {
|
569 david.dillard 1.32 entry = _putBackStack.top();
570 _putBackStack.pop();
571 return true;
|
572 mike 1.13 }
573
574 // If a character was overwritten with a null-terminator the last
575 // time this routine was called, then put back that character. Before
576 // exiting of course, restore the null-terminator.
577
578 char* nullTerminator = 0;
579
580 if (_restoreChar && !*_current)
581 {
|
582 david.dillard 1.32 nullTerminator = _current;
583 *_current = _restoreChar;
584 _restoreChar = '\0';
|
585 mike 1.13 }
586
|
587 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
588 do
589 {
590 // Skip over any whitespace:
591 _skipWhitespace(_line, _current);
592
593 if (!*_current)
594 {
595 if (nullTerminator)
596 *nullTerminator = '\0';
|
597 mike 1.13
|
598 venkat.puvvada 1.41 if (!_stack.isEmpty())
599 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
600 mike 1.13
|
601 venkat.puvvada 1.41 return false;
602 }
|
603 mike 1.13
|
604 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
605 mike 1.13
|
606 venkat.puvvada 1.41 if (*_current == '<')
607 {
608 _current++;
609 _getElement(_current, entry);
|
610 mike 1.13
|
611 venkat.puvvada 1.41 if (nullTerminator)
612 *nullTerminator = '\0';
|
613 mike 1.13
|
614 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
615 {
616 if (_stack.isEmpty() && _foundRoot)
617 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
618 mike 1.13
|
619 venkat.puvvada 1.41 _foundRoot = true;
620 _stack.push((char*)entry.text);
621 }
622 else if (entry.type == XmlEntry::END_TAG)
623 {
624 if (_stack.isEmpty())
625 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
626 mike 1.13
|
627 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
628 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
629 david.dillard 1.32
|
630 venkat.puvvada 1.41 _stack.pop();
631 }
|
632 david.dillard 1.32 }
|
633 venkat.puvvada 1.41 else
|
634 david.dillard 1.32 {
|
635 venkat.puvvada 1.41 // Normalize the content:
|
636 mike 1.13
|
637 kumpf 1.42.4.1 char* start = _current;
638 _normalizeElementValue(_line, _current);
|
639 mike 1.13
|
640 venkat.puvvada 1.41 // Get the content:
|
641 mike 1.13
|
642 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
643 entry.text = start;
|
644 kumpf 1.37
|
645 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
646 kumpf 1.37
|
647 venkat.puvvada 1.41 _restoreChar = *_current;
648 *_current = '\0';
|
649 kumpf 1.37
|
650 venkat.puvvada 1.41 if (nullTerminator)
651 *nullTerminator = '\0';
652 }
653 }while (!includeComment && entry.type == XmlEntry::COMMENT);
|
654 kumpf 1.37
|
655 venkat.puvvada 1.41 return true;
|
656 mike 1.13 }
657
658 void XmlParser::putBack(XmlEntry& entry)
659 {
660 _putBackStack.push(entry);
661 }
662
663 XmlParser::~XmlParser()
664 {
665 // Nothing to do!
666 }
667
|
668 mike 1.35 // A-Za-z0-9_-:.
|
669 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
670 mike 1.35 {
671 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
672 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
673 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
674 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
675 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
676 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
677 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
678 };
679
|
680 mike 1.13 Boolean XmlParser::_getElementName(char*& p)
681 {
|
682 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
683 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
684 mike 1.35
|
685 kumpf 1.24 p++;
|
686 mike 1.13
|
687 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
688 david.dillard 1.32 p++;
|
689 mike 1.13
690 // The next character must be a space:
691
|
692 chuck 1.26 if (_isspace(*p))
|
693 mike 1.13 {
|
694 david.dillard 1.32 *p++ = '\0';
|
695 mike 1.34 _skipWhitespace(_line, p);
|
696 mike 1.13 }
697
698 if (*p == '>')
699 {
|
700 david.dillard 1.32 *p++ = '\0';
701 return true;
|
702 mike 1.13 }
703
704 return false;
705 }
706
707 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
708 {
709 openCloseElement = false;
710
|
711 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
712 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
713 mike 1.35
|
714 kumpf 1.24 p++;
|
715 mike 1.13
|
716 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
717 david.dillard 1.32 p++;
|
718 mike 1.13
719 // The next character must be a space:
720
|
721 chuck 1.26 if (_isspace(*p))
|
722 mike 1.13 {
|
723 david.dillard 1.32 *p++ = '\0';
|
724 mike 1.34 _skipWhitespace(_line, p);
|
725 mike 1.13 }
726
727 if (*p == '>')
728 {
|
729 david.dillard 1.32 *p++ = '\0';
730 return true;
|
731 mike 1.13 }
732
733 if (p[0] == '/' && p[1] == '>')
734 {
|
735 david.dillard 1.32 openCloseElement = true;
736 *p = '\0';
737 p += 2;
738 return true;
|
739 mike 1.13 }
740
741 return false;
742 }
743
744 void XmlParser::_getAttributeNameAndEqual(char*& p)
745 {
|
746 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
|
747 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
748 mike 1.35
|
749 kumpf 1.24 p++;
|
750 mike 1.13
|
751 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
752 david.dillard 1.32 p++;
|
753 mike 1.13
754 char* term = p;
755
|
756 mike 1.34 _skipWhitespace(_line, p);
|
757 mike 1.13
758 if (*p != '=')
|
759 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
760 mike 1.13
761 p++;
762
|
763 mike 1.34 _skipWhitespace(_line, p);
|
764 mike 1.13
765 *term = '\0';
766 }
767
768 void XmlParser::_getComment(char*& p)
769 {
770 // Now p points to first non-whitespace character beyond "<--" sequence:
771
772 for (; *p; p++)
773 {
|
774 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
775 {
776 if (p[2] != '>')
777 {
778 throw XmlException(
779 XmlException::MINUS_MINUS_IN_COMMENT, _line);
780 }
781
782 // Find end of comment (excluding whitespace):
783
784 *p = '\0';
785 p += 3;
786 return;
787 }
|
788 mike 1.13 }
789
790 // If it got this far, then the comment is unterminated:
791
792 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
793 }
794
795 void XmlParser::_getCData(char*& p)
796 {
797 // At this point p points one past "<![CDATA[" sequence:
798
799 for (; *p; p++)
800 {
|
801 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
802 {
803 *p = '\0';
804 p += 3;
805 return;
806 }
807 else if (*p == '\n')
808 _line++;
|
809 mike 1.13 }
810
811 // If it got this far, then the comment is unterminated:
812
813 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
814 }
815
816 void XmlParser::_getDocType(char*& p)
817 {
818 // Just ignore the DOCTYPE command for now:
819
820 for (; *p && *p != '>'; p++)
821 {
|
822 david.dillard 1.32 if (*p == '\n')
823 _line++;
|
824 mike 1.13 }
825
826 if (*p != '>')
|
827 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
828 mike 1.13
829 p++;
830 }
831
832 void XmlParser::_getElement(char*& p, XmlEntry& entry)
833 {
834 entry.attributeCount = 0;
835
836 //--------------------------------------------------------------------------
837 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
838 //--------------------------------------------------------------------------
839
840 if (*p == '?')
841 {
|
842 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
843 entry.text = ++p;
|
844 mike 1.13
|
845 david.dillard 1.32 Boolean openCloseElement = false;
|
846 mike 1.13
|
847 david.dillard 1.32 if (_getElementName(p))
848 return;
|
849 mike 1.13 }
850 else if (*p == '!')
851 {
|
852 david.dillard 1.32 p++;
|
853 mike 1.13
|
854 david.dillard 1.32 // Expect a comment or CDATA:
|
855 mike 1.13
|
856 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
857 {
858 p += 2;
859 entry.type = XmlEntry::COMMENT;
860 entry.text = p;
861 _getComment(p);
862 return;
863 }
864 else if (memcmp(p, "[CDATA[", 7) == 0)
865 {
866 p += 7;
867 entry.type = XmlEntry::CDATA;
868 entry.text = p;
869 _getCData(p);
870 return;
871 }
872 else if (memcmp(p, "DOCTYPE", 7) == 0)
873 {
874 entry.type = XmlEntry::DOCTYPE;
|
875 kumpf 1.37 entry.text = "";
|
876 david.dillard 1.32 _getDocType(p);
877 return;
878 }
879 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
880 mike 1.13 }
881 else if (*p == '/')
882 {
|
883 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
884 entry.text = ++p;
|
885 mike 1.13
|
886 david.dillard 1.32 if (!_getElementName(p))
887 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
888 mike 1.13
|
889 david.dillard 1.32 return;
|
890 mike 1.13 }
|
891 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
892 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
893 (*p == '_')))
|
894 mike 1.13 {
|
895 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
896 entry.text = p;
|
897 mike 1.13
|
898 david.dillard 1.32 Boolean openCloseElement = false;
|
899 mike 1.13
|
900 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement))
901 {
902 if (openCloseElement)
903 entry.type = XmlEntry::EMPTY_TAG;
904 return;
905 }
|
906 mike 1.13 }
907 else
|
908 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
909 mike 1.13
910 //--------------------------------------------------------------------------
911 // Grab all the attributes:
912 //--------------------------------------------------------------------------
913
914 for (;;)
915 {
|
916 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
917 {
918 if (p[0] == '?' && p[1] == '>')
919 {
920 p += 2;
921 return;
922 }
923 }
924 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
925 {
926 entry.type = XmlEntry::EMPTY_TAG;
927 p += 2;
928 return;
929 }
930 else if (*p == '>')
931 {
932 p++;
933 return;
934 }
935
936 XmlAttribute attr;
937 david.dillard 1.32 attr.name = p;
938 _getAttributeNameAndEqual(p);
939
|
940 kumpf 1.37 // Get the attribute value (e.g., "some value")
941 {
942 if ((*p != '"') && (*p != '\''))
943 {
944 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
945 }
946
947 char quote = *p++;
948
949 char* start;
|
950 kumpf 1.42.4.1 _normalizeAttributeValue(_line, p, quote, start);
|
951 kumpf 1.37 attr.value = start;
952
953 if (*p != quote)
954 {
955 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
956 }
957
958 // Overwrite the closing quote with a null-terminator:
|
959 david.dillard 1.32
|
960 kumpf 1.37 *p++ = '\0';
961 }
|
962 david.dillard 1.32
963 if (entry.type == XmlEntry::XML_DECLARATION)
964 {
965 // The next thing must a space or a "?>":
|
966 mike 1.13
|
967 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
968 {
969 throw XmlException(
970 XmlException::BAD_ATTRIBUTE_VALUE, _line);
971 }
972 }
973 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
974 {
975 // The next thing must be a space or a '>':
|
976 mike 1.13
|
977 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
978 }
|
979 mike 1.13
|
980 mike 1.34 _skipWhitespace(_line, p);
|
981 david.dillard 1.32
982 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
983 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
984
985 entry.attributes[entry.attributeCount++] = attr;
|
986 mike 1.13 }
987 }
988
989 static const char* _typeStrings[] =
990 {
|
991 david.dillard 1.32 "XML_DECLARATION",
992 "START_TAG",
993 "EMPTY_TAG",
994 "END_TAG",
|
995 mike 1.13 "COMMENT",
996 "CDATA",
997 "DOCTYPE",
|
998 david.dillard 1.32 "CONTENT"
|
999 mike 1.13 };
1000
1001 void XmlEntry::print() const
1002 {
1003 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1004
1005 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1006
1007 if (needQuotes)
|
1008 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1009
|
1010 mike 1.13 _printValue(text);
1011
1012 if (needQuotes)
|
1013 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1014 mike 1.13
1015 PEGASUS_STD(cout) << '\n';
1016
1017 for (Uint32 i = 0; i < attributeCount; i++)
1018 {
|
1019 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1020 _printValue(attributes[i].value);
1021 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1022 mike 1.13 }
1023 }
1024
1025 const XmlAttribute* XmlEntry::findAttribute(
1026 const char* name) const
1027 {
1028 for (Uint32 i = 0; i < attributeCount; i++)
1029 {
|
1030 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1031 return &attributes[i];
|
1032 mike 1.13 }
1033
1034 return 0;
1035 }
1036
1037 // Find first non-whitespace character (set first) and last non-whitespace
1038 // character (set last one past this). For example, consider this string:
1039 //
|
1040 david.dillard 1.32 // " 87 "
|
1041 mike 1.13 //
1042 // The first pointer would point to '8' and the last pointer woudl point one
1043 // beyond '7'.
1044
1045 static void _findEnds(
|
1046 david.dillard 1.32 const char* str,
1047 const char*& first,
|
1048 mike 1.13 const char*& last)
1049 {
1050 first = str;
1051
|
1052 chuck 1.26 while (_isspace(*first))
|
1053 david.dillard 1.32 first++;
|
1054 mike 1.13
1055 if (!*first)
1056 {
|
1057 david.dillard 1.32 last = first;
1058 return;
|
1059 mike 1.13 }
1060
1061 last = first + strlen(first);
1062
|
1063 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1064 david.dillard 1.32 last--;
|
1065 mike 1.13 }
1066
1067 Boolean XmlEntry::getAttributeValue(
|
1068 david.dillard 1.32 const char* name,
|
1069 mike 1.13 Uint32& value) const
1070 {
1071 const XmlAttribute* attr = findAttribute(name);
1072
1073 if (!attr)
|
1074 david.dillard 1.32 return false;
|
1075 mike 1.13
1076 const char* first;
1077 const char* last;
1078 _findEnds(attr->value, first, last);
1079
1080 char* end = 0;
1081 long tmp = strtol(first, &end, 10);
1082
1083 if (!end || end != last)
|
1084 david.dillard 1.32 return false;
|
1085 mike 1.13
1086 value = Uint32(tmp);
1087 return true;
1088 }
1089
1090 Boolean XmlEntry::getAttributeValue(
|
1091 david.dillard 1.32 const char* name,
|
1092 mike 1.13 Real32& value) const
1093 {
1094 const XmlAttribute* attr = findAttribute(name);
1095
1096 if (!attr)
|
1097 david.dillard 1.32 return false;
|
1098 mike 1.13
1099 const char* first;
1100 const char* last;
1101 _findEnds(attr->value, first, last);
1102
1103 char* end = 0;
1104 double tmp = strtod(first, &end);
1105
1106 if (!end || end != last)
|
1107 david.dillard 1.32 return false;
|
1108 mike 1.13
|
1109 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1110 mike 1.13 return true;
1111 }
1112
1113 Boolean XmlEntry::getAttributeValue(
|
1114 david.dillard 1.32 const char* name,
|
1115 mike 1.13 const char*& value) const
1116 {
1117 const XmlAttribute* attr = findAttribute(name);
1118
1119 if (!attr)
|
1120 david.dillard 1.32 return false;
|
1121 mike 1.13
1122 value = attr->value;
1123 return true;
1124 }
1125
1126 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1127 {
1128 const char* tmp;
1129
1130 if (!getAttributeValue(name, tmp))
|
1131 david.dillard 1.32 return false;
|
1132 mike 1.13
|
1133 chuck 1.28 value = String(tmp);
|
1134 mike 1.13 return true;
1135 }
1136
|
1137 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1138 mike 1.13 {
|
1139 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1140 mike 1.13 }
1141
1142 PEGASUS_NAMESPACE_END
|