1 martin 1.51 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.52 //
|
3 martin 1.51 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.52 //
|
10 martin 1.51 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.52 //
|
17 martin 1.51 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.52 //
|
20 martin 1.51 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.52 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.51 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.52 //
|
28 martin 1.51 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.13 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32 ////////////////////////////////////////////////////////////////////////////////
33 //
34 // XmlParser
35 //
|
36 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
37 // serveral rules for well-formed XML:
|
38 mike 1.13 //
|
39 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
42 mike 1.13 //
|
43 david.dillard 1.32 // 2. Comments have the form:
|
44 mike 1.13 //
|
45 david.dillard 1.32 // <!-- blah blah blah -->
|
46 mike 1.13 //
|
47 david.dillard 1.32 // 3. The following entity references are supported:
|
48 mike 1.13 //
|
49 david.dillard 1.32 // & - ampersand
50 // < - less-than
51 // > - greater-than
52 // " - full quote
53 // &apos - apostrophe
|
54 mike 1.13 //
|
55 kumpf 1.18 // as well as character (numeric) references:
|
56 mike 1.35 //
|
57 kumpf 1.18 // 1 - decimal reference for character '1'
58 // 1 - hexadecimal reference for character '1'
59 //
|
60 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
61 mike 1.13 //
|
62 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
63 mike 1.13 //
|
64 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
65 mike 1.13 //
|
66 david.dillard 1.32 // <![CDATA[
67 // ...
68 // ]]>
|
69 mike 1.13 //
|
70 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
73 // XmlAttribute values must be delimited.
|
74 mike 1.13 //
|
75 david.dillard 1.32 // 8. <!DOCTYPE...>
|
76 mike 1.13 //
77 // TODO:
78 //
|
79 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
80 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
81 mike 1.13 // rules rather than references to files).
82 //
|
83 david.dillard 1.32 // Remove newlines from string literals:
|
84 mike 1.13 //
85 // Example: <xyz x="hello
|
86 david.dillard 1.32 // world">
|
87 mike 1.13 //
88 ////////////////////////////////////////////////////////////////////////////////
89
|
90 sage 1.14 #include <Pegasus/Common/Config.h>
|
91 mike 1.13 #include <cctype>
92 #include <cstdio>
93 #include <cstdlib>
94 #include <cstring>
95 #include "XmlParser.h"
96 #include "Logger.h"
|
97 chuck 1.19 #include "ExceptionRep.h"
|
98 mike 1.34 #include "CharSet.h"
|
99 mike 1.13
100 PEGASUS_NAMESPACE_BEGIN
101
102 ////////////////////////////////////////////////////////////////////////////////
103 //
104 // Static helper functions
105 //
106 ////////////////////////////////////////////////////////////////////////////////
107
108 static void _printValue(const char* p)
109 {
110 for (; *p; p++)
111 {
|
112 david.dillard 1.32 if (*p == '\n')
113 PEGASUS_STD(cout) << "\\n";
114 else if (*p == '\r')
115 PEGASUS_STD(cout) << "\\r";
116 else if (*p == '\t')
117 PEGASUS_STD(cout) << "\\t";
118 else
119 PEGASUS_STD(cout) << *p;
|
120 mike 1.13 }
121 }
122
123 struct EntityReference
124 {
125 const char* match;
126 Uint32 length;
127 char replacement;
128 };
129
|
130 kumpf 1.18 // ATTN: Add support for more entity references
|
131 mike 1.13 static EntityReference _references[] =
132 {
133 { "&", 5, '&' },
134 { "<", 4, '<' },
135 { ">", 4, '>' },
136 { """, 6, '"' },
137 { "'", 6, '\'' }
138 };
139
|
140 chuck 1.26
141 // Implements a check for a whitespace character, without calling
142 // isspace( ). The isspace( ) function is locale-sensitive,
143 // and incorrectly flags some chars above 0x7f as whitespace. This
144 // causes the XmlParser to incorrectly parse UTF-8 data.
145 //
146 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
147 // defines white space as:
|
148 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
149 mike 1.34 static inline int _isspace(char c)
|
150 chuck 1.26 {
|
151 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
152 chuck 1.26 }
153
|
154 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
155
156 ////////////////////////////////////////////////////////////////////////////////
157 //
158 // XmlException
159 //
160 ////////////////////////////////////////////////////////////////////////////////
161
162 static const char* _xmlMessages[] =
163 {
164 "Bad opening element",
165 "Bad closing element",
166 "Bad attribute name",
167 "Exepected equal sign",
168 "Bad attribute value",
169 "A \"--\" sequence found within comment",
170 "Unterminated comment",
171 "Unterminated CDATA block",
172 "Unterminated DOCTYPE",
173 "Malformed reference",
174 "Expected a comment or CDATA following \"<!\" sequence",
175 mike 1.13 "Closing element does not match opening element",
176 "One or more tags are still open",
177 "More than one root element was encountered",
178 "Validation error",
|
179 kumpf 1.45 "Semantic error",
180 "Namespace not declared"
|
181 mike 1.13 };
182
|
183 david.dillard 1.32 static const char* _xmlKeys[] =
|
184 chuck 1.19 {
|
185 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
186 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
187 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
188 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
189 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
190 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
191 "Common.XmlParser.UNTERMINATED_COMMENT",
192 "Common.XmlParser.UNTERMINATED_CDATA",
193 "Common.XmlParser.UNTERMINATED_DOCTYPE",
194 "Common.XmlParser.MALFORMED_REFERENCE",
195 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
196 "Common.XmlParser.START_END_MISMATCH",
|
197 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
198 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
199 "Common.XmlParser.VALIDATION_ERROR",
|
200 kumpf 1.45 "Common.XmlParser.SEMANTIC_ERROR",
201 "Common.XmlParser.UNDECLARED_NAMESPACE"
|
202 chuck 1.19 };
203
204
|
205 kumpf 1.40 static MessageLoaderParms _formMessage(
206 Uint32 code,
207 Uint32 line,
208 const String& message)
|
209 chuck 1.19 {
210 String dftMsg = _xmlMessages[Uint32(code) - 1];
|
211 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
|
212 kumpf 1.45 String msg = message;
|
213 chuck 1.19
214 dftMsg.append(": on line $0");
215 if (message.size())
216 {
|
217 david.dillard 1.32 msg = ": " + msg;
218 dftMsg.append("$1");
219 }
|
220 chuck 1.19
|
221 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line ,msg);
|
222 chuck 1.19 }
223
224 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
225 {
226 String dftMsg = _xmlMessages[Uint32(code) - 1];
|
227 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
|
228 chuck 1.19
229 dftMsg.append(": on line $0");
|
230 david.dillard 1.32
|
231 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line);
|
232 chuck 1.19 }
233
|
234 mike 1.13
235 XmlException::XmlException(
|
236 david.dillard 1.32 XmlException::Code code,
|
237 mike 1.13 Uint32 lineNumber,
|
238 david.dillard 1.32 const String& message)
|
239 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
240 {
241
242 }
243
|
244 chuck 1.19
245 XmlException::XmlException(
|
246 david.dillard 1.32 XmlException::Code code,
|
247 chuck 1.19 Uint32 lineNumber,
|
248 david.dillard 1.32 MessageLoaderParms& msgParms)
|
249 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
250 {
|
251 david.dillard 1.32 if (msgParms.default_msg.size())
|
252 humberto 1.21 {
|
253 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
254 }
255 _rep->message.append(MessageLoader::getMessage(msgParms));
|
256 chuck 1.19 }
257
258
|
259 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
260 //
261 // XmlValidationError
262 //
263 ////////////////////////////////////////////////////////////////////////////////
264
265 XmlValidationError::XmlValidationError(
266 Uint32 lineNumber,
267 const String& message)
268 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
269 {
270 }
271
|
272 chuck 1.19
273 XmlValidationError::XmlValidationError(
274 Uint32 lineNumber,
275 MessageLoaderParms& msgParms)
276 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
277 {
278 }
279
280
|
281 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
282 //
283 // XmlSemanticError
284 //
285 ////////////////////////////////////////////////////////////////////////////////
286
287 XmlSemanticError::XmlSemanticError(
288 Uint32 lineNumber,
289 const String& message)
290 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
291 {
292 }
|
293 chuck 1.19
294
295 XmlSemanticError::XmlSemanticError(
296 Uint32 lineNumber,
297 MessageLoaderParms& msgParms)
298 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
299 {
300 }
301
|
302 mike 1.13
303 ////////////////////////////////////////////////////////////////////////////////
304 //
305 // XmlParser
306 //
307 ////////////////////////////////////////////////////////////////////////////////
308
|
309 mike 1.55 XmlParser::XmlParser(char* text, XmlNamespace* ns, Boolean hideEmptyTags)
|
310 kumpf 1.40 : _line(1),
311 _current(text),
312 _restoreChar('\0'),
|
313 kumpf 1.45 _foundRoot(false),
314 _supportedNamespaces(ns),
315 // Start valid indexes with -2. -1 is reserved for not found.
|
316 mike 1.55 _currentUnsupportedNSType(-2),
317 _hideEmptyTags(hideEmptyTags)
|
318 mike 1.13 {
319 }
320
|
321 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
322 {
323 while (*p && _isspace(*p))
324 {
325 if (*p == '\n')
326 line++;
327
328 p++;
329 }
330 }
331
|
332 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
333 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
334 #pragma optimize( "", off )
335 #endif
|
336 kumpf 1.37 static int _getEntityRef(char*& p)
337 {
338 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
339 {
340 p += 3;
341 return '>';
342 }
343
344 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
345 {
346 p += 3;
347 return '<';
348 }
349
350 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
351 (p[4] == ';'))
352 {
353 p += 5;
354 return '\'';
355 }
356
357 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
358 (p[4] == ';'))
359 {
360 p += 5;
361 return '"';
362 }
363
364 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
365 {
366 p += 4;
367 return '&';
368 }
369
370 return -1;
371 }
|
372 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
373 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
|
374 kumpf 1.45 #pragma optimize( "", on )
|
375 s.manicka 1.43 #endif
|
376 kumpf 1.37
|
377 kumpf 1.50 static inline int _getCharRef(char*& p)
|
378 kumpf 1.37 {
379 char* end;
380 unsigned long ch;
|
381 kumpf 1.50 Boolean hex = false;
|
382 kumpf 1.37
|
383 kumpf 1.50 if (*p == 'x')
|
384 kumpf 1.37 {
|
385 kumpf 1.50 hex = true;
386 ch = strtoul(++p, &end, 16);
|
387 kumpf 1.37 }
388 else
389 {
390 ch = strtoul(p, &end, 10);
391 }
392
393 if ((end == p) || (*end != ';') || (ch > 255))
394 {
395 return -1;
396 }
397
398 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
399 {
400 return -1;
401 }
402
403 p = end + 1;
404
405 return ch;
406 }
407
|
408 kumpf 1.50 // Parse an entity reference or a character reference
409 static inline int _getRef(Uint32 line, char*& p)
410 {
411 int ch;
412
413 if (*p == '#')
414 {
415 ch = _getCharRef(++p);
416 }
417 else
418 {
419 ch = _getEntityRef(p);
420 }
421
422 if (ch == -1)
423 {
424 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
425 }
426
427 return ch;
428 }
429 kumpf 1.50
430 static inline void _normalizeElementValue(
431 Uint32& line,
|
432 venkat.puvvada 1.54 char*& p,
433 Uint32 &textLen)
|
434 kumpf 1.50 {
435 // Process one character at a time:
436
437 char* q = p;
|
438 venkat.puvvada 1.54 char *start = p;
|
439 kumpf 1.50
440 while (*p && (*p != '<'))
441 {
442 if (_isspace(*p))
443 {
444 // Trim whitespace from the end of the value, but do not compress
445 // whitespace within the value.
446
447 const char* start = p;
448
449 if (*p++ == '\n')
450 {
451 line++;
452 }
453
454 _skipWhitespace(line, p);
455
456 if (*p && (*p != '<'))
457 {
458 // Transfer internal whitespace to q without compressing it.
459 const char* i = start;
460 kumpf 1.50 while (i < p)
461 {
462 *q++ = *i++;
463 }
464 }
465 else
466 {
467 // Do not transfer trailing whitespace to q.
468 break;
469 }
470 }
471 else if (*p == '&')
472 {
473 // Process an entity reference or a character reference.
474
475 *q++ = _getRef(line, ++p);
476 }
477 else
478 {
479 *q++ = *p++;
480 }
481 kumpf 1.50 }
482
483 // If q got behind p, it is safe and necessary to null-terminate q
484
485 if (q != p)
486 {
487 *q = '\0';
488 }
|
489 venkat.puvvada 1.54 textLen = (Uint32)(q - start);
|
490 kumpf 1.50 }
491
492 static inline void _normalizeAttributeValue(
493 Uint32& line,
494 char*& p,
495 char end_char,
496 char*& start)
|
497 kumpf 1.37 {
498 // Skip over leading whitespace:
499
500 _skipWhitespace(line, p);
501 start = p;
502
503 // Process one character at a time:
504
505 char* q = p;
506
507 while (*p && (*p != end_char))
508 {
509 if (_isspace(*p))
510 {
511 // Compress sequences of whitespace characters to a single space
512 // character. Update line number when newlines encountered.
513
514 if (*p++ == '\n')
515 {
516 line++;
517 }
518 kumpf 1.37
519 *q++ = ' ';
520
521 _skipWhitespace(line, p);
522 }
523 else if (*p == '&')
524 {
|
525 kumpf 1.50 // Process an entity reference or a character reference.
|
526 kumpf 1.37
|
527 kumpf 1.50 *q++ = _getRef(line, ++p);
|
528 kumpf 1.37 }
529 else
530 {
531 *q++ = *p++;
532 }
533 }
534
535 // Remove single trailing whitespace (consecutive whitespaces already
536 // compressed above). Since p >= q, we can tell if we need to strip a
537 // trailing space from q by looking at the end of p. We must not look at
538 // the last character of p, though, if p is an empty string.
|
539 dmitry.mikulin 1.44 Boolean adjust_q = (p != start) && _isspace(p[-1]);
540
541 // We encountered a the end_char or a zero-terminator.
542
543 *q = *p;
|
544 kumpf 1.37
|
545 dmitry.mikulin 1.44 if (adjust_q)
|
546 kumpf 1.37 {
547 q--;
548 }
549
550 // If q got behind p, it is safe and necessary to null-terminate q
551
552 if (q != p)
553 {
554 *q = '\0';
555 }
556 }
557
|
558 mike 1.55 Boolean XmlParser::_next(
|
559 kumpf 1.45 XmlEntry& entry,
560 Boolean includeComment)
|
561 mike 1.13 {
562 if (!_putBackStack.isEmpty())
563 {
|
564 david.dillard 1.32 entry = _putBackStack.top();
565 _putBackStack.pop();
566 return true;
|
567 mike 1.13 }
568
569 // If a character was overwritten with a null-terminator the last
570 // time this routine was called, then put back that character. Before
571 // exiting of course, restore the null-terminator.
572
573 char* nullTerminator = 0;
574
575 if (_restoreChar && !*_current)
576 {
|
577 david.dillard 1.32 nullTerminator = _current;
578 *_current = _restoreChar;
579 _restoreChar = '\0';
|
580 mike 1.13 }
581
|
582 kumpf 1.45 entry.attributes.clear();
583
584 if (_supportedNamespaces)
585 {
586 // Remove namespaces of a deeper scope level from the stack.
587 while (!_nameSpaces.isEmpty() &&
588 _nameSpaces.top().scopeLevel > _stack.size())
589 {
590 _nameSpaces.pop();
591 }
592 }
593
|
594 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
595 do
596 {
597 // Skip over any whitespace:
598 _skipWhitespace(_line, _current);
599
600 if (!*_current)
601 {
602 if (nullTerminator)
603 *nullTerminator = '\0';
|
604 mike 1.13
|
605 venkat.puvvada 1.41 if (!_stack.isEmpty())
606 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
607 mike 1.13
|
608 venkat.puvvada 1.41 return false;
609 }
|
610 mike 1.13
|
611 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
612 mike 1.13
|
613 venkat.puvvada 1.41 if (*_current == '<')
614 {
615 _current++;
616 _getElement(_current, entry);
|
617 mike 1.13
|
618 venkat.puvvada 1.41 if (nullTerminator)
619 *nullTerminator = '\0';
|
620 mike 1.13
|
621 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
622 {
623 if (_stack.isEmpty() && _foundRoot)
624 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
625 mike 1.13
|
626 venkat.puvvada 1.41 _foundRoot = true;
627 _stack.push((char*)entry.text);
628 }
629 else if (entry.type == XmlEntry::END_TAG)
630 {
631 if (_stack.isEmpty())
632 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
633 mike 1.13
|
634 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
635 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
636 david.dillard 1.32
|
637 venkat.puvvada 1.41 _stack.pop();
638 }
|
639 david.dillard 1.32 }
|
640 venkat.puvvada 1.41 else
|
641 david.dillard 1.32 {
|
642 venkat.puvvada 1.41 // Normalize the content:
|
643 mike 1.13
|
644 kumpf 1.50 char* start = _current;
|
645 venkat.puvvada 1.54 Uint32 textLen;
646 _normalizeElementValue(_line, _current, textLen);
|
647 mike 1.13
|
648 venkat.puvvada 1.41 // Get the content:
|
649 mike 1.13
|
650 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
651 entry.text = start;
|
652 venkat.puvvada 1.54 entry.textLen = textLen;
|
653 kumpf 1.37
|
654 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
655 kumpf 1.37
|
656 venkat.puvvada 1.41 _restoreChar = *_current;
657 *_current = '\0';
|
658 kumpf 1.37
|
659 venkat.puvvada 1.41 if (nullTerminator)
660 *nullTerminator = '\0';
661 }
|
662 kumpf 1.45 } while (!includeComment && entry.type == XmlEntry::COMMENT);
663
664 if (_supportedNamespaces &&
665 (entry.type == XmlEntry::START_TAG ||
666 entry.type == XmlEntry::EMPTY_TAG ||
667 entry.type == XmlEntry::END_TAG))
668 {
669 // Determine the namespace type for this entry
670
671 if (entry.type == XmlEntry::START_TAG ||
672 entry.type == XmlEntry::EMPTY_TAG)
673 {
674 // Process namespace declarations and determine the namespace type
675 // for the attributes.
676
677 Uint32 scopeLevel = _stack.size();
678 if (entry.type == XmlEntry::EMPTY_TAG)
679 {
680 // Empty tags are deeper scope, but not pushed onto the stack
681 scopeLevel++;
682 }
683 kumpf 1.45
684 for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++)
685 {
686 XmlAttribute& attr = entry.attributes[i];
687 if ((strncmp(attr.name, "xmlns:", 6) == 0) ||
688 (strcmp(attr.name, "xmlns") == 0))
689 {
690 // Process a namespace declaration
691 XmlNamespace ns;
692 if (attr.name[5] == ':')
693 {
694 ns.localName = attr.localName;
695 }
696 else
697 {
698 // Default name space has no local name
699 ns.localName = 0;
700 }
701 ns.extendedName = attr.value;
702 ns.scopeLevel = scopeLevel;
703 ns.type = _getSupportedNamespaceType(ns.extendedName);
704 kumpf 1.45
705 // If the namespace is not supported, assign it a unique
706 // negative identifier.
707 if (ns.type == -1)
708 {
709 ns.type = _currentUnsupportedNSType--;
710 }
711
712 _nameSpaces.push(ns);
713 }
714 else
715 {
716 // Get the namespace type for this attribute.
717 attr.nsType = _getNamespaceType(attr.name);
718 }
719 }
720 }
721
722 entry.nsType = _getNamespaceType(entry.text);
723 }
724 else
725 kumpf 1.45 {
726 entry.nsType = -1;
727 }
|
728 kumpf 1.37
|
729 venkat.puvvada 1.41 return true;
|
730 mike 1.13 }
731
|
732 mike 1.55 Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)
733 {
734 if (_hideEmptyTags)
735 {
736 // Get the next tag.
737
738 if (!_next(entry, includeComment))
739 return false;
740
741 // If an EMPTY_TAG is encountered, then convert it to a START_TAG and
742 // push a matching END_TAG on the put-back stack. This hides every
743 // EMPTY_TAG from the caller.
744
745 if (entry.type == XmlEntry::EMPTY_TAG)
746 {
747 entry.type = XmlEntry::START_TAG;
748
749 XmlEntry tmp;
750 tmp.type = XmlEntry::END_TAG;
751 tmp.text = entry.text;
752 tmp.nsType = entry.nsType;
753 mike 1.55 tmp.localName = entry.localName;
754
755 _putBackStack.push(tmp);
756 }
757
758 return true;
759 }
760 else
761 return _next(entry, includeComment);
762 }
763
|
764 kumpf 1.45 // Get the namespace type of the given tag
765 int XmlParser::_getNamespaceType(const char* tag)
766 {
767 const char* pos = strchr(tag, ':');
768
769 // If ':' is not found, the tag is not namespace qualified and we
770 // need to look for the default name space.
771
772 // Search the namespace stack from the top
773 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
774 {
775 // If ':' is found, look for the name space with the matching
776 // local name...
777 if ((pos && _nameSpaces[i].localName &&
778 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
779 // ... otherwise look for the default name space. It's the
780 // one with localName set to NULL
781 (!pos && !_nameSpaces[i].localName))
782 {
783 return _nameSpaces[i].type;
784 }
785 kumpf 1.45 }
786
787 // If the tag is namespace qualified, but the name space has not been
788 // declared, it's malformed XML and we must throw an exception.
789 // Note: The "xml" namespace is specifically defined by the W3C as a
790 // reserved prefix ("http://www.w3.org/XML/1998/namespace").
791 if (pos && (strncmp(tag, "xml:", 4) != 0))
792 {
793 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
794 }
795
796 // Otherwise it's OK not to have a name space.
797 return -1;
798 }
799
800 // Given the extended namespace name, find it in the table of supported
801 // namespaces and return its type.
802 int XmlParser::_getSupportedNamespaceType(const char* extendedName)
803 {
804 for (Sint32 i = 0;
805 _supportedNamespaces[i].localName != 0;
806 kumpf 1.45 i++)
807 {
808 PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
809 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
810 {
811 return _supportedNamespaces[i].type;
812 }
813 }
814 return -1;
815 }
816
817 XmlNamespace* XmlParser::getNamespace(int nsType)
818 {
819 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
820 {
821 if (_nameSpaces[i].type == nsType)
822 {
823 return &_nameSpaces[i];
824 }
825 }
826 return 0;
827 kumpf 1.45 }
828
|
829 mike 1.13 void XmlParser::putBack(XmlEntry& entry)
830 {
831 _putBackStack.push(entry);
832 }
833
834 XmlParser::~XmlParser()
835 {
836 // Nothing to do!
837 }
838
|
839 kumpf 1.45 // A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)
|
840 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
841 mike 1.35 {
842 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
843 kumpf 1.45 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
|
844 mike 1.35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
845 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
846 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
847 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
848 thilo.boehm 1.49 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
849 mike 1.35 };
850
|
851 kumpf 1.45 inline Boolean _getQName(char*& p, const char*& localName)
|
852 mike 1.13 {
|
853 kumpf 1.45 localName = p;
854
|
855 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
856 kumpf 1.45 return false;
|
857 mike 1.35
|
858 kumpf 1.24 p++;
|
859 mike 1.13
|
860 kumpf 1.53 // No explicit test for NULL termination is needed.
861 // On position 0 of the array false is returned.
|
862 thilo.boehm 1.49 while (_isInnerElementChar[Uint8(*p)])
|
863 david.dillard 1.32 p++;
|
864 mike 1.13
|
865 kumpf 1.45 // We've validated the prefix, now validate the local name
866 if (*p == ':')
867 {
868 localName = ++p;
869
870 if (!CharSet::isAlNumUnder(Uint8(*p)))
871 return false;
872
873 p++;
|
874 kumpf 1.53 // No explicit test for NULL termination is needed.
875 // On position 0 of the array false is returned.
|
876 thilo.boehm 1.49 while (_isInnerElementChar[Uint8(*p)])
|
877 kumpf 1.45 p++;
878 }
879
880 return true;
881 }
882
883 Boolean XmlParser::_getElementName(char*& p, const char*& localName)
884 {
885 if (!_getQName(p, localName))
886 throw XmlException(XmlException::BAD_START_TAG, _line);
887
|
888 mike 1.13 // The next character must be a space:
889
|
890 chuck 1.26 if (_isspace(*p))
|
891 mike 1.13 {
|
892 david.dillard 1.32 *p++ = '\0';
|
893 mike 1.34 _skipWhitespace(_line, p);
|
894 mike 1.13 }
895
896 if (*p == '>')
897 {
|
898 david.dillard 1.32 *p++ = '\0';
899 return true;
|
900 mike 1.13 }
901
902 return false;
903 }
904
|
905 kumpf 1.45 Boolean XmlParser::_getOpenElementName(
906 char*& p,
907 const char*& localName,
908 Boolean& openCloseElement)
|
909 mike 1.13 {
910 openCloseElement = false;
911
|
912 kumpf 1.45 if (!_getQName(p, localName))
|
913 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
914 mike 1.35
|
915 mike 1.13 // The next character must be a space:
916
|
917 chuck 1.26 if (_isspace(*p))
|
918 mike 1.13 {
|
919 david.dillard 1.32 *p++ = '\0';
|
920 mike 1.34 _skipWhitespace(_line, p);
|
921 mike 1.13 }
922
923 if (*p == '>')
924 {
|
925 david.dillard 1.32 *p++ = '\0';
926 return true;
|
927 mike 1.13 }
928
929 if (p[0] == '/' && p[1] == '>')
930 {
|
931 david.dillard 1.32 openCloseElement = true;
932 *p = '\0';
933 p += 2;
934 return true;
|
935 mike 1.13 }
936
937 return false;
938 }
939
|
940 kumpf 1.45 void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName)
|
941 mike 1.13 {
|
942 kumpf 1.45 if (!_getQName(p, localName))
|
943 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
944 mike 1.35
|
945 mike 1.13 char* term = p;
946
|
947 mike 1.34 _skipWhitespace(_line, p);
|
948 mike 1.13
949 if (*p != '=')
|
950 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
951 mike 1.13
952 p++;
953
|
954 mike 1.34 _skipWhitespace(_line, p);
|
955 mike 1.13
956 *term = '\0';
957 }
958
959 void XmlParser::_getComment(char*& p)
960 {
961 // Now p points to first non-whitespace character beyond "<--" sequence:
962
963 for (; *p; p++)
964 {
|
965 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
966 {
967 if (p[2] != '>')
968 {
969 throw XmlException(
970 XmlException::MINUS_MINUS_IN_COMMENT, _line);
971 }
972
973 // Find end of comment (excluding whitespace):
974
975 *p = '\0';
976 p += 3;
977 return;
978 }
|
979 mike 1.13 }
980
981 // If it got this far, then the comment is unterminated:
982
983 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
984 }
985
986 void XmlParser::_getCData(char*& p)
987 {
988 // At this point p points one past "<![CDATA[" sequence:
989
990 for (; *p; p++)
991 {
|
992 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
993 {
994 *p = '\0';
995 p += 3;
996 return;
997 }
998 else if (*p == '\n')
999 _line++;
|
1000 mike 1.13 }
1001
1002 // If it got this far, then the comment is unterminated:
1003
1004 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
1005 }
1006
1007 void XmlParser::_getDocType(char*& p)
1008 {
1009 // Just ignore the DOCTYPE command for now:
1010
1011 for (; *p && *p != '>'; p++)
1012 {
|
1013 david.dillard 1.32 if (*p == '\n')
1014 _line++;
|
1015 mike 1.13 }
1016
1017 if (*p != '>')
|
1018 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
1019 mike 1.13
1020 p++;
1021 }
1022
1023 void XmlParser::_getElement(char*& p, XmlEntry& entry)
1024 {
1025 //--------------------------------------------------------------------------
1026 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
1027 //--------------------------------------------------------------------------
1028
1029 if (*p == '?')
1030 {
|
1031 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
1032 entry.text = ++p;
|
1033 mike 1.13
|
1034 kumpf 1.45 if (_getElementName(p, entry.localName))
|
1035 david.dillard 1.32 return;
|
1036 mike 1.13 }
1037 else if (*p == '!')
1038 {
|
1039 david.dillard 1.32 p++;
|
1040 mike 1.13
|
1041 david.dillard 1.32 // Expect a comment or CDATA:
|
1042 mike 1.13
|
1043 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
1044 {
1045 p += 2;
1046 entry.type = XmlEntry::COMMENT;
1047 entry.text = p;
1048 _getComment(p);
1049 return;
1050 }
1051 else if (memcmp(p, "[CDATA[", 7) == 0)
1052 {
1053 p += 7;
1054 entry.type = XmlEntry::CDATA;
1055 entry.text = p;
1056 _getCData(p);
|
1057 venkat.puvvada 1.54 entry.textLen = strlen(entry.text);
|
1058 david.dillard 1.32 return;
1059 }
1060 else if (memcmp(p, "DOCTYPE", 7) == 0)
1061 {
1062 entry.type = XmlEntry::DOCTYPE;
|
1063 kumpf 1.37 entry.text = "";
|
1064 david.dillard 1.32 _getDocType(p);
1065 return;
1066 }
1067 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
1068 mike 1.13 }
1069 else if (*p == '/')
1070 {
|
1071 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
1072 entry.text = ++p;
|
1073 mike 1.13
|
1074 kumpf 1.45 if (!_getElementName(p, entry.localName))
|
1075 david.dillard 1.32 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
1076 mike 1.13
|
1077 david.dillard 1.32 return;
|
1078 mike 1.13 }
|
1079 thilo.boehm 1.49 else if (CharSet::isAlphaUnder(Uint8(*p)))
|
1080 mike 1.13 {
|
1081 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
1082 entry.text = p;
|
1083 mike 1.13
|
1084 david.dillard 1.32 Boolean openCloseElement = false;
|
1085 mike 1.13
|
1086 kumpf 1.45 if (_getOpenElementName(p, entry.localName, openCloseElement))
|
1087 david.dillard 1.32 {
1088 if (openCloseElement)
1089 entry.type = XmlEntry::EMPTY_TAG;
1090 return;
1091 }
|
1092 mike 1.13 }
1093 else
|
1094 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
1095 mike 1.13
1096 //--------------------------------------------------------------------------
1097 // Grab all the attributes:
1098 //--------------------------------------------------------------------------
1099
1100 for (;;)
1101 {
|
1102 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
1103 {
1104 if (p[0] == '?' && p[1] == '>')
1105 {
1106 p += 2;
1107 return;
1108 }
1109 }
1110 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
1111 {
1112 entry.type = XmlEntry::EMPTY_TAG;
1113 p += 2;
1114 return;
1115 }
1116 else if (*p == '>')
1117 {
1118 p++;
1119 return;
1120 }
1121
1122 XmlAttribute attr;
|
1123 kumpf 1.45 attr.nsType = -1;
|
1124 david.dillard 1.32 attr.name = p;
|
1125 kumpf 1.45 _getAttributeNameAndEqual(p, attr.localName);
|
1126 david.dillard 1.32
|
1127 kumpf 1.37 // Get the attribute value (e.g., "some value")
1128 {
1129 if ((*p != '"') && (*p != '\''))
1130 {
1131 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1132 }
1133
1134 char quote = *p++;
1135
1136 char* start;
|
1137 kumpf 1.50 _normalizeAttributeValue(_line, p, quote, start);
|
1138 kumpf 1.37 attr.value = start;
1139
1140 if (*p != quote)
1141 {
1142 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1143 }
1144
1145 // Overwrite the closing quote with a null-terminator:
|
1146 david.dillard 1.32
|
1147 kumpf 1.37 *p++ = '\0';
1148 }
|
1149 david.dillard 1.32
1150 if (entry.type == XmlEntry::XML_DECLARATION)
1151 {
1152 // The next thing must a space or a "?>":
|
1153 mike 1.13
|
1154 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
1155 {
1156 throw XmlException(
1157 XmlException::BAD_ATTRIBUTE_VALUE, _line);
1158 }
1159 }
1160 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
1161 {
1162 // The next thing must be a space or a '>':
|
1163 mike 1.13
|
1164 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1165 }
|
1166 mike 1.13
|
1167 mike 1.34 _skipWhitespace(_line, p);
|
1168 david.dillard 1.32
|
1169 kumpf 1.45 entry.attributes.append(attr);
|
1170 mike 1.13 }
1171 }
1172
1173 static const char* _typeStrings[] =
1174 {
|
1175 david.dillard 1.32 "XML_DECLARATION",
1176 "START_TAG",
1177 "EMPTY_TAG",
1178 "END_TAG",
|
1179 mike 1.13 "COMMENT",
1180 "CDATA",
1181 "DOCTYPE",
|
1182 david.dillard 1.32 "CONTENT"
|
1183 mike 1.13 };
1184
1185 void XmlEntry::print() const
1186 {
1187 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1188
1189 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1190
1191 if (needQuotes)
|
1192 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1193
|
1194 mike 1.13 _printValue(text);
1195
1196 if (needQuotes)
|
1197 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1198 mike 1.13
1199 PEGASUS_STD(cout) << '\n';
1200
|
1201 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
|
1202 mike 1.13 {
|
1203 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1204 _printValue(attributes[i].value);
1205 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1206 mike 1.13 }
1207 }
1208
1209 const XmlAttribute* XmlEntry::findAttribute(
1210 const char* name) const
1211 {
|
1212 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
|
1213 mike 1.13 {
|
1214 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1215 return &attributes[i];
|
1216 mike 1.13 }
1217
1218 return 0;
1219 }
1220
|
1221 kumpf 1.45 const XmlAttribute* XmlEntry::findAttribute(
|
1222 kumpf 1.47 int attrNsType,
|
1223 kumpf 1.45 const char* name) const
1224 {
1225 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
1226 {
|
1227 kumpf 1.47 if ((attributes[i].nsType == attrNsType) &&
|
1228 kumpf 1.45 (strcmp(attributes[i].localName, name) == 0))
1229 {
1230 return &attributes[i];
1231 }
1232 }
1233
1234 return 0;
1235 }
1236
|
1237 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace
1238 // character (set last one past this). For example, consider this string:
1239 //
|
1240 david.dillard 1.32 // " 87 "
|
1241 mike 1.13 //
1242 // The first pointer would point to '8' and the last pointer woudl point one
1243 // beyond '7'.
1244
1245 static void _findEnds(
|
1246 david.dillard 1.32 const char* str,
1247 const char*& first,
|
1248 mike 1.13 const char*& last)
1249 {
1250 first = str;
1251
|
1252 chuck 1.26 while (_isspace(*first))
|
1253 david.dillard 1.32 first++;
|
1254 mike 1.13
1255 if (!*first)
1256 {
|
1257 david.dillard 1.32 last = first;
1258 return;
|
1259 mike 1.13 }
1260
1261 last = first + strlen(first);
1262
|
1263 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1264 david.dillard 1.32 last--;
|
1265 mike 1.13 }
1266
1267 Boolean XmlEntry::getAttributeValue(
|
1268 david.dillard 1.32 const char* name,
|
1269 mike 1.13 Uint32& value) const
1270 {
1271 const XmlAttribute* attr = findAttribute(name);
1272
1273 if (!attr)
|
1274 david.dillard 1.32 return false;
|
1275 mike 1.13
1276 const char* first;
1277 const char* last;
1278 _findEnds(attr->value, first, last);
1279
1280 char* end = 0;
1281 long tmp = strtol(first, &end, 10);
1282
1283 if (!end || end != last)
|
1284 david.dillard 1.32 return false;
|
1285 mike 1.13
1286 value = Uint32(tmp);
1287 return true;
1288 }
1289
1290 Boolean XmlEntry::getAttributeValue(
|
1291 david.dillard 1.32 const char* name,
|
1292 mike 1.13 Real32& value) const
1293 {
1294 const XmlAttribute* attr = findAttribute(name);
1295
1296 if (!attr)
|
1297 david.dillard 1.32 return false;
|
1298 mike 1.13
1299 const char* first;
1300 const char* last;
1301 _findEnds(attr->value, first, last);
1302
1303 char* end = 0;
1304 double tmp = strtod(first, &end);
1305
1306 if (!end || end != last)
|
1307 david.dillard 1.32 return false;
|
1308 mike 1.13
|
1309 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1310 mike 1.13 return true;
1311 }
1312
1313 Boolean XmlEntry::getAttributeValue(
|
1314 david.dillard 1.32 const char* name,
|
1315 mike 1.13 const char*& value) const
1316 {
1317 const XmlAttribute* attr = findAttribute(name);
1318
1319 if (!attr)
|
1320 david.dillard 1.32 return false;
|
1321 mike 1.13
1322 value = attr->value;
1323 return true;
1324 }
1325
1326 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1327 {
1328 const char* tmp;
1329
1330 if (!getAttributeValue(name, tmp))
|
1331 david.dillard 1.32 return false;
|
1332 mike 1.13
|
1333 chuck 1.28 value = String(tmp);
|
1334 mike 1.13 return true;
1335 }
1336
|
1337 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1338 mike 1.13 {
|
1339 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1340 mike 1.13 }
1341
1342 PEGASUS_NAMESPACE_END
|