1 martin 1.51 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.52 //
|
3 martin 1.51 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.52 //
|
10 martin 1.51 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.52 //
|
17 martin 1.51 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.52 //
|
20 martin 1.51 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.52 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.51 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.52 //
|
28 martin 1.51 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.13 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32 ////////////////////////////////////////////////////////////////////////////////
33 //
34 // XmlParser
35 //
|
36 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
37 // serveral rules for well-formed XML:
|
38 mike 1.13 //
|
39 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
42 mike 1.13 //
|
43 david.dillard 1.32 // 2. Comments have the form:
|
44 mike 1.13 //
|
45 david.dillard 1.32 // <!-- blah blah blah -->
|
46 mike 1.13 //
|
47 david.dillard 1.32 // 3. The following entity references are supported:
|
48 mike 1.13 //
|
49 david.dillard 1.32 // & - ampersand
50 // < - less-than
51 // > - greater-than
52 // " - full quote
53 // &apos - apostrophe
|
54 mike 1.13 //
|
55 kumpf 1.18 // as well as character (numeric) references:
|
56 mike 1.35 //
|
57 kumpf 1.18 // 1 - decimal reference for character '1'
58 // 1 - hexadecimal reference for character '1'
59 //
|
60 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
61 mike 1.13 //
|
62 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
63 mike 1.13 //
|
64 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
65 mike 1.13 //
|
66 david.dillard 1.32 // <![CDATA[
67 // ...
68 // ]]>
|
69 mike 1.13 //
|
70 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
73 // XmlAttribute values must be delimited.
|
74 mike 1.13 //
|
75 david.dillard 1.32 // 8. <!DOCTYPE...>
|
76 mike 1.13 //
77 // TODO:
78 //
|
79 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
80 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
81 mike 1.13 // rules rather than references to files).
82 //
|
83 david.dillard 1.32 // Remove newlines from string literals:
|
84 mike 1.13 //
85 // Example: <xyz x="hello
|
86 david.dillard 1.32 // world">
|
87 mike 1.13 //
88 ////////////////////////////////////////////////////////////////////////////////
89
|
90 sage 1.14 #include <Pegasus/Common/Config.h>
|
91 mike 1.13 #include <cctype>
92 #include <cstdio>
93 #include <cstdlib>
94 #include <cstring>
95 #include "XmlParser.h"
96 #include "Logger.h"
|
97 chuck 1.19 #include "ExceptionRep.h"
|
98 mike 1.34 #include "CharSet.h"
|
99 mike 1.13
100 PEGASUS_NAMESPACE_BEGIN
101
102 ////////////////////////////////////////////////////////////////////////////////
103 //
104 // Static helper functions
105 //
106 ////////////////////////////////////////////////////////////////////////////////
107
108 static void _printValue(const char* p)
109 {
110 for (; *p; p++)
111 {
|
112 david.dillard 1.32 if (*p == '\n')
113 PEGASUS_STD(cout) << "\\n";
114 else if (*p == '\r')
115 PEGASUS_STD(cout) << "\\r";
116 else if (*p == '\t')
117 PEGASUS_STD(cout) << "\\t";
118 else
119 PEGASUS_STD(cout) << *p;
|
120 mike 1.13 }
121 }
122
123 struct EntityReference
124 {
125 const char* match;
126 Uint32 length;
127 char replacement;
128 };
129
|
130 chuck 1.26 // Implements a check for a whitespace character, without calling
131 // isspace( ). The isspace( ) function is locale-sensitive,
132 // and incorrectly flags some chars above 0x7f as whitespace. This
133 // causes the XmlParser to incorrectly parse UTF-8 data.
134 //
135 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
136 // defines white space as:
|
137 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
138 mike 1.34 static inline int _isspace(char c)
|
139 chuck 1.26 {
|
140 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
141 chuck 1.26 }
142
|
143 mike 1.13
144 ////////////////////////////////////////////////////////////////////////////////
145 //
146 // XmlException
147 //
148 ////////////////////////////////////////////////////////////////////////////////
149
150 static const char* _xmlMessages[] =
151 {
152 "Bad opening element",
153 "Bad closing element",
154 "Bad attribute name",
155 "Exepected equal sign",
156 "Bad attribute value",
157 "A \"--\" sequence found within comment",
158 "Unterminated comment",
159 "Unterminated CDATA block",
160 "Unterminated DOCTYPE",
161 "Malformed reference",
162 "Expected a comment or CDATA following \"<!\" sequence",
163 "Closing element does not match opening element",
164 mike 1.13 "One or more tags are still open",
165 "More than one root element was encountered",
166 "Validation error",
|
167 kumpf 1.45 "Semantic error",
168 "Namespace not declared"
|
169 mike 1.13 };
170
|
171 david.dillard 1.32 static const char* _xmlKeys[] =
|
172 chuck 1.19 {
|
173 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
174 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
175 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
176 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
177 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
178 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
179 "Common.XmlParser.UNTERMINATED_COMMENT",
180 "Common.XmlParser.UNTERMINATED_CDATA",
181 "Common.XmlParser.UNTERMINATED_DOCTYPE",
182 "Common.XmlParser.MALFORMED_REFERENCE",
183 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
184 "Common.XmlParser.START_END_MISMATCH",
|
185 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
186 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
187 "Common.XmlParser.VALIDATION_ERROR",
|
188 kumpf 1.45 "Common.XmlParser.SEMANTIC_ERROR",
189 "Common.XmlParser.UNDECLARED_NAMESPACE"
|
190 chuck 1.19 };
191
192
|
193 kumpf 1.40 static MessageLoaderParms _formMessage(
194 Uint32 code,
195 Uint32 line,
196 const String& message)
|
197 chuck 1.19 {
198 String dftMsg = _xmlMessages[Uint32(code) - 1];
|
199 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
|
200 kumpf 1.45 String msg = message;
|
201 chuck 1.19
202 dftMsg.append(": on line $0");
203 if (message.size())
204 {
|
205 david.dillard 1.32 msg = ": " + msg;
206 dftMsg.append("$1");
207 }
|
208 chuck 1.19
|
209 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line ,msg);
|
210 chuck 1.19 }
211
212 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
213 {
214 String dftMsg = _xmlMessages[Uint32(code) - 1];
|
215 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
|
216 chuck 1.19
217 dftMsg.append(": on line $0");
|
218 david.dillard 1.32
|
219 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line);
|
220 chuck 1.19 }
221
|
222 mike 1.13
223 XmlException::XmlException(
|
224 david.dillard 1.32 XmlException::Code code,
|
225 mike 1.13 Uint32 lineNumber,
|
226 david.dillard 1.32 const String& message)
|
227 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
228 {
229
230 }
231
|
232 chuck 1.19
233 XmlException::XmlException(
|
234 david.dillard 1.32 XmlException::Code code,
|
235 chuck 1.19 Uint32 lineNumber,
|
236 david.dillard 1.32 MessageLoaderParms& msgParms)
|
237 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
238 {
|
239 david.dillard 1.32 if (msgParms.default_msg.size())
|
240 humberto 1.21 {
|
241 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
242 }
243 _rep->message.append(MessageLoader::getMessage(msgParms));
|
244 chuck 1.19 }
245
246
|
247 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
248 //
249 // XmlValidationError
250 //
251 ////////////////////////////////////////////////////////////////////////////////
252
253 XmlValidationError::XmlValidationError(
254 Uint32 lineNumber,
255 const String& message)
256 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
257 {
258 }
259
|
260 chuck 1.19
261 XmlValidationError::XmlValidationError(
262 Uint32 lineNumber,
263 MessageLoaderParms& msgParms)
264 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
265 {
266 }
267
268
|
269 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
270 //
271 // XmlSemanticError
272 //
273 ////////////////////////////////////////////////////////////////////////////////
274
275 XmlSemanticError::XmlSemanticError(
276 Uint32 lineNumber,
277 const String& message)
278 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
279 {
280 }
|
281 chuck 1.19
282
283 XmlSemanticError::XmlSemanticError(
284 Uint32 lineNumber,
285 MessageLoaderParms& msgParms)
286 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
287 {
288 }
289
|
290 mike 1.13
291 ////////////////////////////////////////////////////////////////////////////////
292 //
293 // XmlParser
294 //
295 ////////////////////////////////////////////////////////////////////////////////
296
|
297 mike 1.55 XmlParser::XmlParser(char* text, XmlNamespace* ns, Boolean hideEmptyTags)
|
298 kumpf 1.40 : _line(1),
299 _current(text),
300 _restoreChar('\0'),
|
301 kumpf 1.45 _foundRoot(false),
302 _supportedNamespaces(ns),
303 // Start valid indexes with -2. -1 is reserved for not found.
|
304 mike 1.55 _currentUnsupportedNSType(-2),
305 _hideEmptyTags(hideEmptyTags)
|
306 mike 1.13 {
307 }
308
|
309 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
310 {
311 while (*p && _isspace(*p))
312 {
313 if (*p == '\n')
314 line++;
315
316 p++;
317 }
318 }
319
|
320 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
321 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
322 #pragma optimize( "", off )
323 #endif
|
324 kumpf 1.37 static int _getEntityRef(char*& p)
325 {
326 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
327 {
328 p += 3;
329 return '>';
330 }
331
332 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
333 {
334 p += 3;
335 return '<';
336 }
337
338 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
339 (p[4] == ';'))
340 {
341 p += 5;
342 return '\'';
343 }
344
345 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
346 (p[4] == ';'))
347 {
348 p += 5;
349 return '"';
350 }
351
352 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
353 {
354 p += 4;
355 return '&';
356 }
357
358 return -1;
359 }
|
360 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
361 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
|
362 kumpf 1.45 #pragma optimize( "", on )
|
363 s.manicka 1.43 #endif
|
364 kumpf 1.37
|
365 kumpf 1.50 static inline int _getCharRef(char*& p)
|
366 kumpf 1.37 {
367 char* end;
368 unsigned long ch;
|
369 kumpf 1.50 Boolean hex = false;
|
370 kumpf 1.37
|
371 kumpf 1.50 if (*p == 'x')
|
372 kumpf 1.37 {
|
373 kumpf 1.50 hex = true;
374 ch = strtoul(++p, &end, 16);
|
375 kumpf 1.37 }
376 else
377 {
378 ch = strtoul(p, &end, 10);
379 }
380
381 if ((end == p) || (*end != ';') || (ch > 255))
382 {
383 return -1;
384 }
385
386 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
387 {
388 return -1;
389 }
390
391 p = end + 1;
392
393 return ch;
394 }
395
|
396 kumpf 1.50 // Parse an entity reference or a character reference
397 static inline int _getRef(Uint32 line, char*& p)
398 {
399 int ch;
400
401 if (*p == '#')
402 {
403 ch = _getCharRef(++p);
404 }
405 else
406 {
407 ch = _getEntityRef(p);
408 }
409
410 if (ch == -1)
411 {
412 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
413 }
414
415 return ch;
416 }
417 kumpf 1.50
418 static inline void _normalizeElementValue(
419 Uint32& line,
|
420 venkat.puvvada 1.54 char*& p,
421 Uint32 &textLen)
|
422 kumpf 1.50 {
423 // Process one character at a time:
424
425 char* q = p;
|
426 venkat.puvvada 1.54 char *start = p;
|
427 kumpf 1.50
428 while (*p && (*p != '<'))
429 {
430 if (_isspace(*p))
431 {
432 // Trim whitespace from the end of the value, but do not compress
433 // whitespace within the value.
434
435 const char* start = p;
436
437 if (*p++ == '\n')
438 {
439 line++;
440 }
441
442 _skipWhitespace(line, p);
443
444 if (*p && (*p != '<'))
445 {
446 // Transfer internal whitespace to q without compressing it.
447 const char* i = start;
448 kumpf 1.50 while (i < p)
449 {
450 *q++ = *i++;
451 }
452 }
453 else
454 {
455 // Do not transfer trailing whitespace to q.
456 break;
457 }
458 }
459 else if (*p == '&')
460 {
461 // Process an entity reference or a character reference.
462
463 *q++ = _getRef(line, ++p);
464 }
465 else
466 {
467 *q++ = *p++;
468 }
469 kumpf 1.50 }
470
471 // If q got behind p, it is safe and necessary to null-terminate q
472
473 if (q != p)
474 {
475 *q = '\0';
476 }
|
477 venkat.puvvada 1.54 textLen = (Uint32)(q - start);
|
478 kumpf 1.50 }
479
480 static inline void _normalizeAttributeValue(
481 Uint32& line,
482 char*& p,
483 char end_char,
484 char*& start)
|
485 kumpf 1.37 {
486 // Skip over leading whitespace:
487
488 _skipWhitespace(line, p);
489 start = p;
490
491 // Process one character at a time:
492
493 char* q = p;
494
495 while (*p && (*p != end_char))
496 {
497 if (_isspace(*p))
498 {
499 // Compress sequences of whitespace characters to a single space
500 // character. Update line number when newlines encountered.
501
502 if (*p++ == '\n')
503 {
504 line++;
505 }
506 kumpf 1.37
507 *q++ = ' ';
508
509 _skipWhitespace(line, p);
510 }
511 else if (*p == '&')
512 {
|
513 kumpf 1.50 // Process an entity reference or a character reference.
|
514 kumpf 1.37
|
515 kumpf 1.50 *q++ = _getRef(line, ++p);
|
516 kumpf 1.37 }
517 else
518 {
519 *q++ = *p++;
520 }
521 }
522
523 // Remove single trailing whitespace (consecutive whitespaces already
524 // compressed above). Since p >= q, we can tell if we need to strip a
525 // trailing space from q by looking at the end of p. We must not look at
526 // the last character of p, though, if p is an empty string.
|
527 dmitry.mikulin 1.44 Boolean adjust_q = (p != start) && _isspace(p[-1]);
528
529 // We encountered a the end_char or a zero-terminator.
530
531 *q = *p;
|
532 kumpf 1.37
|
533 dmitry.mikulin 1.44 if (adjust_q)
|
534 kumpf 1.37 {
535 q--;
536 }
537
538 // If q got behind p, it is safe and necessary to null-terminate q
539
540 if (q != p)
541 {
542 *q = '\0';
543 }
544 }
545
|
546 mike 1.55 Boolean XmlParser::_next(
|
547 kumpf 1.45 XmlEntry& entry,
548 Boolean includeComment)
|
549 mike 1.13 {
550 if (!_putBackStack.isEmpty())
551 {
|
552 david.dillard 1.32 entry = _putBackStack.top();
553 _putBackStack.pop();
554 return true;
|
555 mike 1.13 }
556
557 // If a character was overwritten with a null-terminator the last
558 // time this routine was called, then put back that character. Before
559 // exiting of course, restore the null-terminator.
560
561 char* nullTerminator = 0;
562
563 if (_restoreChar && !*_current)
564 {
|
565 david.dillard 1.32 nullTerminator = _current;
566 *_current = _restoreChar;
567 _restoreChar = '\0';
|
568 mike 1.13 }
569
|
570 kumpf 1.45 entry.attributes.clear();
571
572 if (_supportedNamespaces)
573 {
574 // Remove namespaces of a deeper scope level from the stack.
575 while (!_nameSpaces.isEmpty() &&
576 _nameSpaces.top().scopeLevel > _stack.size())
577 {
578 _nameSpaces.pop();
579 }
580 }
581
|
582 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
583 do
584 {
585 // Skip over any whitespace:
586 _skipWhitespace(_line, _current);
587
588 if (!*_current)
589 {
590 if (nullTerminator)
591 *nullTerminator = '\0';
|
592 mike 1.13
|
593 venkat.puvvada 1.41 if (!_stack.isEmpty())
594 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
595 mike 1.13
|
596 venkat.puvvada 1.41 return false;
597 }
|
598 mike 1.13
|
599 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
600 mike 1.13
|
601 venkat.puvvada 1.41 if (*_current == '<')
602 {
603 _current++;
604 _getElement(_current, entry);
|
605 mike 1.13
|
606 venkat.puvvada 1.41 if (nullTerminator)
607 *nullTerminator = '\0';
|
608 mike 1.13
|
609 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
610 {
611 if (_stack.isEmpty() && _foundRoot)
612 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
613 mike 1.13
|
614 venkat.puvvada 1.41 _foundRoot = true;
615 _stack.push((char*)entry.text);
616 }
617 else if (entry.type == XmlEntry::END_TAG)
618 {
619 if (_stack.isEmpty())
620 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
621 mike 1.13
|
622 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
623 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
624 david.dillard 1.32
|
625 venkat.puvvada 1.41 _stack.pop();
626 }
|
627 david.dillard 1.32 }
|
628 venkat.puvvada 1.41 else
|
629 david.dillard 1.32 {
|
630 venkat.puvvada 1.41 // Normalize the content:
|
631 mike 1.13
|
632 kumpf 1.50 char* start = _current;
|
633 venkat.puvvada 1.54 Uint32 textLen;
634 _normalizeElementValue(_line, _current, textLen);
|
635 mike 1.13
|
636 venkat.puvvada 1.41 // Get the content:
|
637 mike 1.13
|
638 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
639 entry.text = start;
|
640 venkat.puvvada 1.54 entry.textLen = textLen;
|
641 kumpf 1.37
|
642 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
643 kumpf 1.37
|
644 venkat.puvvada 1.41 _restoreChar = *_current;
645 *_current = '\0';
|
646 kumpf 1.37
|
647 venkat.puvvada 1.41 if (nullTerminator)
648 *nullTerminator = '\0';
649 }
|
650 kumpf 1.45 } while (!includeComment && entry.type == XmlEntry::COMMENT);
651
652 if (_supportedNamespaces &&
653 (entry.type == XmlEntry::START_TAG ||
654 entry.type == XmlEntry::EMPTY_TAG ||
655 entry.type == XmlEntry::END_TAG))
656 {
657 // Determine the namespace type for this entry
658
659 if (entry.type == XmlEntry::START_TAG ||
660 entry.type == XmlEntry::EMPTY_TAG)
661 {
662 // Process namespace declarations and determine the namespace type
663 // for the attributes.
664
665 Uint32 scopeLevel = _stack.size();
666 if (entry.type == XmlEntry::EMPTY_TAG)
667 {
668 // Empty tags are deeper scope, but not pushed onto the stack
669 scopeLevel++;
670 }
671 kumpf 1.45
672 for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++)
673 {
674 XmlAttribute& attr = entry.attributes[i];
675 if ((strncmp(attr.name, "xmlns:", 6) == 0) ||
676 (strcmp(attr.name, "xmlns") == 0))
677 {
678 // Process a namespace declaration
679 XmlNamespace ns;
680 if (attr.name[5] == ':')
681 {
682 ns.localName = attr.localName;
683 }
684 else
685 {
686 // Default name space has no local name
687 ns.localName = 0;
688 }
689 ns.extendedName = attr.value;
690 ns.scopeLevel = scopeLevel;
691 ns.type = _getSupportedNamespaceType(ns.extendedName);
692 kumpf 1.45
693 // If the namespace is not supported, assign it a unique
694 // negative identifier.
695 if (ns.type == -1)
696 {
697 ns.type = _currentUnsupportedNSType--;
698 }
699
700 _nameSpaces.push(ns);
701 }
702 else
703 {
704 // Get the namespace type for this attribute.
705 attr.nsType = _getNamespaceType(attr.name);
706 }
707 }
708 }
709
710 entry.nsType = _getNamespaceType(entry.text);
711 }
712 else
713 kumpf 1.45 {
714 entry.nsType = -1;
715 }
|
716 kumpf 1.37
|
717 venkat.puvvada 1.41 return true;
|
718 mike 1.13 }
719
|
720 mike 1.55 Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)
721 {
722 if (_hideEmptyTags)
723 {
724 // Get the next tag.
725
726 if (!_next(entry, includeComment))
727 return false;
728
729 // If an EMPTY_TAG is encountered, then convert it to a START_TAG and
730 // push a matching END_TAG on the put-back stack. This hides every
731 // EMPTY_TAG from the caller.
732
733 if (entry.type == XmlEntry::EMPTY_TAG)
734 {
735 entry.type = XmlEntry::START_TAG;
736
737 XmlEntry tmp;
738 tmp.type = XmlEntry::END_TAG;
739 tmp.text = entry.text;
740 tmp.nsType = entry.nsType;
741 mike 1.55 tmp.localName = entry.localName;
742
743 _putBackStack.push(tmp);
744 }
745
746 return true;
747 }
748 else
749 return _next(entry, includeComment);
750 }
751
|
752 kumpf 1.45 // Get the namespace type of the given tag
753 int XmlParser::_getNamespaceType(const char* tag)
754 {
755 const char* pos = strchr(tag, ':');
756
757 // If ':' is not found, the tag is not namespace qualified and we
758 // need to look for the default name space.
759
760 // Search the namespace stack from the top
761 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
762 {
763 // If ':' is found, look for the name space with the matching
764 // local name...
765 if ((pos && _nameSpaces[i].localName &&
766 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
767 // ... otherwise look for the default name space. It's the
768 // one with localName set to NULL
769 (!pos && !_nameSpaces[i].localName))
770 {
771 return _nameSpaces[i].type;
772 }
773 kumpf 1.45 }
774
775 // If the tag is namespace qualified, but the name space has not been
776 // declared, it's malformed XML and we must throw an exception.
777 // Note: The "xml" namespace is specifically defined by the W3C as a
778 // reserved prefix ("http://www.w3.org/XML/1998/namespace").
779 if (pos && (strncmp(tag, "xml:", 4) != 0))
780 {
781 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
782 }
783
784 // Otherwise it's OK not to have a name space.
785 return -1;
786 }
787
788 // Given the extended namespace name, find it in the table of supported
789 // namespaces and return its type.
790 int XmlParser::_getSupportedNamespaceType(const char* extendedName)
791 {
792 for (Sint32 i = 0;
793 _supportedNamespaces[i].localName != 0;
794 kumpf 1.45 i++)
795 {
796 PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
797 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
798 {
799 return _supportedNamespaces[i].type;
800 }
801 }
802 return -1;
803 }
804
805 XmlNamespace* XmlParser::getNamespace(int nsType)
806 {
807 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
808 {
809 if (_nameSpaces[i].type == nsType)
810 {
811 return &_nameSpaces[i];
812 }
813 }
814 return 0;
815 kumpf 1.45 }
816
|
817 mike 1.13 void XmlParser::putBack(XmlEntry& entry)
818 {
819 _putBackStack.push(entry);
820 }
821
822 XmlParser::~XmlParser()
823 {
824 // Nothing to do!
825 }
826
|
827 kumpf 1.45 // A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)
|
828 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
829 mike 1.35 {
830 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
831 kumpf 1.45 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
|
832 mike 1.35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
833 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
834 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
835 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
836 thilo.boehm 1.49 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
837 mike 1.35 };
838
|
839 kumpf 1.45 inline Boolean _getQName(char*& p, const char*& localName)
|
840 mike 1.13 {
|
841 kumpf 1.45 localName = p;
842
|
843 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
844 kumpf 1.45 return false;
|
845 mike 1.35
|
846 kumpf 1.24 p++;
|
847 mike 1.13
|
848 kumpf 1.53 // No explicit test for NULL termination is needed.
849 // On position 0 of the array false is returned.
|
850 thilo.boehm 1.49 while (_isInnerElementChar[Uint8(*p)])
|
851 david.dillard 1.32 p++;
|
852 mike 1.13
|
853 kumpf 1.45 // We've validated the prefix, now validate the local name
854 if (*p == ':')
855 {
856 localName = ++p;
857
858 if (!CharSet::isAlNumUnder(Uint8(*p)))
859 return false;
860
861 p++;
|
862 kumpf 1.53 // No explicit test for NULL termination is needed.
863 // On position 0 of the array false is returned.
|
864 thilo.boehm 1.49 while (_isInnerElementChar[Uint8(*p)])
|
865 kumpf 1.45 p++;
866 }
867
868 return true;
869 }
870
871 Boolean XmlParser::_getElementName(char*& p, const char*& localName)
872 {
873 if (!_getQName(p, localName))
874 throw XmlException(XmlException::BAD_START_TAG, _line);
875
|
876 mike 1.13 // The next character must be a space:
877
|
878 chuck 1.26 if (_isspace(*p))
|
879 mike 1.13 {
|
880 david.dillard 1.32 *p++ = '\0';
|
881 mike 1.34 _skipWhitespace(_line, p);
|
882 mike 1.13 }
883
884 if (*p == '>')
885 {
|
886 david.dillard 1.32 *p++ = '\0';
887 return true;
|
888 mike 1.13 }
889
890 return false;
891 }
892
|
893 kumpf 1.45 Boolean XmlParser::_getOpenElementName(
894 char*& p,
895 const char*& localName,
896 Boolean& openCloseElement)
|
897 mike 1.13 {
898 openCloseElement = false;
899
|
900 kumpf 1.45 if (!_getQName(p, localName))
|
901 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
902 mike 1.35
|
903 mike 1.13 // The next character must be a space:
904
|
905 chuck 1.26 if (_isspace(*p))
|
906 mike 1.13 {
|
907 david.dillard 1.32 *p++ = '\0';
|
908 mike 1.34 _skipWhitespace(_line, p);
|
909 mike 1.13 }
910
911 if (*p == '>')
912 {
|
913 david.dillard 1.32 *p++ = '\0';
914 return true;
|
915 mike 1.13 }
916
917 if (p[0] == '/' && p[1] == '>')
918 {
|
919 david.dillard 1.32 openCloseElement = true;
920 *p = '\0';
921 p += 2;
922 return true;
|
923 mike 1.13 }
924
925 return false;
926 }
927
|
928 kumpf 1.45 void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName)
|
929 mike 1.13 {
|
930 kumpf 1.45 if (!_getQName(p, localName))
|
931 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
932 mike 1.35
|
933 mike 1.13 char* term = p;
934
|
935 mike 1.34 _skipWhitespace(_line, p);
|
936 mike 1.13
937 if (*p != '=')
|
938 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
939 mike 1.13
940 p++;
941
|
942 mike 1.34 _skipWhitespace(_line, p);
|
943 mike 1.13
944 *term = '\0';
945 }
946
947 void XmlParser::_getComment(char*& p)
948 {
949 // Now p points to first non-whitespace character beyond "<--" sequence:
950
951 for (; *p; p++)
952 {
|
953 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
954 {
955 if (p[2] != '>')
956 {
957 throw XmlException(
958 XmlException::MINUS_MINUS_IN_COMMENT, _line);
959 }
960
961 // Find end of comment (excluding whitespace):
962
963 *p = '\0';
964 p += 3;
965 return;
966 }
|
967 mike 1.13 }
968
969 // If it got this far, then the comment is unterminated:
970
971 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
972 }
973
974 void XmlParser::_getCData(char*& p)
975 {
976 // At this point p points one past "<![CDATA[" sequence:
977
978 for (; *p; p++)
979 {
|
980 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
981 {
982 *p = '\0';
983 p += 3;
984 return;
985 }
986 else if (*p == '\n')
987 _line++;
|
988 mike 1.13 }
989
990 // If it got this far, then the comment is unterminated:
991
992 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
993 }
994
995 void XmlParser::_getDocType(char*& p)
996 {
997 // Just ignore the DOCTYPE command for now:
998
999 for (; *p && *p != '>'; p++)
1000 {
|
1001 david.dillard 1.32 if (*p == '\n')
1002 _line++;
|
1003 mike 1.13 }
1004
1005 if (*p != '>')
|
1006 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
1007 mike 1.13
1008 p++;
1009 }
1010
1011 void XmlParser::_getElement(char*& p, XmlEntry& entry)
1012 {
1013 //--------------------------------------------------------------------------
1014 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
1015 //--------------------------------------------------------------------------
1016
1017 if (*p == '?')
1018 {
|
1019 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
1020 entry.text = ++p;
|
1021 mike 1.13
|
1022 kumpf 1.45 if (_getElementName(p, entry.localName))
|
1023 david.dillard 1.32 return;
|
1024 mike 1.13 }
1025 else if (*p == '!')
1026 {
|
1027 david.dillard 1.32 p++;
|
1028 mike 1.13
|
1029 david.dillard 1.32 // Expect a comment or CDATA:
|
1030 mike 1.13
|
1031 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
1032 {
1033 p += 2;
1034 entry.type = XmlEntry::COMMENT;
1035 entry.text = p;
1036 _getComment(p);
1037 return;
1038 }
1039 else if (memcmp(p, "[CDATA[", 7) == 0)
1040 {
1041 p += 7;
1042 entry.type = XmlEntry::CDATA;
1043 entry.text = p;
1044 _getCData(p);
|
1045 venkat.puvvada 1.54 entry.textLen = strlen(entry.text);
|
1046 david.dillard 1.32 return;
1047 }
1048 else if (memcmp(p, "DOCTYPE", 7) == 0)
1049 {
1050 entry.type = XmlEntry::DOCTYPE;
|
1051 kumpf 1.37 entry.text = "";
|
1052 david.dillard 1.32 _getDocType(p);
1053 return;
1054 }
1055 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
1056 mike 1.13 }
1057 else if (*p == '/')
1058 {
|
1059 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
1060 entry.text = ++p;
|
1061 mike 1.13
|
1062 kumpf 1.45 if (!_getElementName(p, entry.localName))
|
1063 david.dillard 1.32 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
1064 mike 1.13
|
1065 david.dillard 1.32 return;
|
1066 mike 1.13 }
|
1067 thilo.boehm 1.49 else if (CharSet::isAlphaUnder(Uint8(*p)))
|
1068 mike 1.13 {
|
1069 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
1070 entry.text = p;
|
1071 mike 1.13
|
1072 david.dillard 1.32 Boolean openCloseElement = false;
|
1073 mike 1.13
|
1074 kumpf 1.45 if (_getOpenElementName(p, entry.localName, openCloseElement))
|
1075 david.dillard 1.32 {
1076 if (openCloseElement)
1077 entry.type = XmlEntry::EMPTY_TAG;
1078 return;
1079 }
|
1080 mike 1.13 }
1081 else
|
1082 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
1083 mike 1.13
1084 //--------------------------------------------------------------------------
1085 // Grab all the attributes:
1086 //--------------------------------------------------------------------------
1087
1088 for (;;)
1089 {
|
1090 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
1091 {
1092 if (p[0] == '?' && p[1] == '>')
1093 {
1094 p += 2;
1095 return;
1096 }
1097 }
1098 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
1099 {
1100 entry.type = XmlEntry::EMPTY_TAG;
1101 p += 2;
1102 return;
1103 }
1104 else if (*p == '>')
1105 {
1106 p++;
1107 return;
1108 }
1109
1110 XmlAttribute attr;
|
1111 kumpf 1.45 attr.nsType = -1;
|
1112 david.dillard 1.32 attr.name = p;
|
1113 kumpf 1.45 _getAttributeNameAndEqual(p, attr.localName);
|
1114 david.dillard 1.32
|
1115 kumpf 1.37 // Get the attribute value (e.g., "some value")
1116 {
1117 if ((*p != '"') && (*p != '\''))
1118 {
1119 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1120 }
1121
1122 char quote = *p++;
1123
1124 char* start;
|
1125 kumpf 1.50 _normalizeAttributeValue(_line, p, quote, start);
|
1126 kumpf 1.37 attr.value = start;
1127
1128 if (*p != quote)
1129 {
1130 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1131 }
1132
1133 // Overwrite the closing quote with a null-terminator:
|
1134 david.dillard 1.32
|
1135 kumpf 1.37 *p++ = '\0';
1136 }
|
1137 david.dillard 1.32
1138 if (entry.type == XmlEntry::XML_DECLARATION)
1139 {
1140 // The next thing must a space or a "?>":
|
1141 mike 1.13
|
1142 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
1143 {
1144 throw XmlException(
1145 XmlException::BAD_ATTRIBUTE_VALUE, _line);
1146 }
1147 }
1148 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
1149 {
1150 // The next thing must be a space or a '>':
|
1151 mike 1.13
|
1152 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1153 }
|
1154 mike 1.13
|
1155 mike 1.34 _skipWhitespace(_line, p);
|
1156 david.dillard 1.32
|
1157 kumpf 1.45 entry.attributes.append(attr);
|
1158 mike 1.13 }
1159 }
1160
1161 static const char* _typeStrings[] =
1162 {
|
1163 david.dillard 1.32 "XML_DECLARATION",
1164 "START_TAG",
1165 "EMPTY_TAG",
1166 "END_TAG",
|
1167 mike 1.13 "COMMENT",
1168 "CDATA",
1169 "DOCTYPE",
|
1170 david.dillard 1.32 "CONTENT"
|
1171 mike 1.13 };
1172
1173 void XmlEntry::print() const
1174 {
1175 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1176
1177 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1178
1179 if (needQuotes)
|
1180 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1181
|
1182 mike 1.13 _printValue(text);
1183
1184 if (needQuotes)
|
1185 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1186 mike 1.13
1187 PEGASUS_STD(cout) << '\n';
1188
|
1189 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
|
1190 mike 1.13 {
|
1191 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1192 _printValue(attributes[i].value);
1193 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1194 mike 1.13 }
1195 }
1196
1197 const XmlAttribute* XmlEntry::findAttribute(
1198 const char* name) const
1199 {
|
1200 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
|
1201 mike 1.13 {
|
1202 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1203 return &attributes[i];
|
1204 mike 1.13 }
1205
1206 return 0;
1207 }
1208
|
1209 kumpf 1.45 const XmlAttribute* XmlEntry::findAttribute(
|
1210 kumpf 1.47 int attrNsType,
|
1211 kumpf 1.45 const char* name) const
1212 {
1213 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
1214 {
|
1215 kumpf 1.47 if ((attributes[i].nsType == attrNsType) &&
|
1216 kumpf 1.45 (strcmp(attributes[i].localName, name) == 0))
1217 {
1218 return &attributes[i];
1219 }
1220 }
1221
1222 return 0;
1223 }
1224
|
1225 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace
1226 // character (set last one past this). For example, consider this string:
1227 //
|
1228 david.dillard 1.32 // " 87 "
|
1229 mike 1.13 //
1230 // The first pointer would point to '8' and the last pointer woudl point one
1231 // beyond '7'.
1232
1233 static void _findEnds(
|
1234 david.dillard 1.32 const char* str,
1235 const char*& first,
|
1236 mike 1.13 const char*& last)
1237 {
1238 first = str;
1239
|
1240 chuck 1.26 while (_isspace(*first))
|
1241 david.dillard 1.32 first++;
|
1242 mike 1.13
1243 if (!*first)
1244 {
|
1245 david.dillard 1.32 last = first;
1246 return;
|
1247 mike 1.13 }
1248
1249 last = first + strlen(first);
1250
|
1251 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1252 david.dillard 1.32 last--;
|
1253 mike 1.13 }
1254
1255 Boolean XmlEntry::getAttributeValue(
|
1256 david.dillard 1.32 const char* name,
|
1257 mike 1.13 Uint32& value) const
1258 {
1259 const XmlAttribute* attr = findAttribute(name);
1260
1261 if (!attr)
|
1262 david.dillard 1.32 return false;
|
1263 mike 1.13
1264 const char* first;
1265 const char* last;
1266 _findEnds(attr->value, first, last);
1267
1268 char* end = 0;
1269 long tmp = strtol(first, &end, 10);
1270
1271 if (!end || end != last)
|
1272 david.dillard 1.32 return false;
|
1273 mike 1.13
1274 value = Uint32(tmp);
1275 return true;
1276 }
1277
1278 Boolean XmlEntry::getAttributeValue(
|
1279 david.dillard 1.32 const char* name,
|
1280 mike 1.13 Real32& value) const
1281 {
1282 const XmlAttribute* attr = findAttribute(name);
1283
1284 if (!attr)
|
1285 david.dillard 1.32 return false;
|
1286 mike 1.13
1287 const char* first;
1288 const char* last;
1289 _findEnds(attr->value, first, last);
1290
1291 char* end = 0;
1292 double tmp = strtod(first, &end);
1293
1294 if (!end || end != last)
|
1295 david.dillard 1.32 return false;
|
1296 mike 1.13
|
1297 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1298 mike 1.13 return true;
1299 }
1300
1301 Boolean XmlEntry::getAttributeValue(
|
1302 david.dillard 1.32 const char* name,
|
1303 mike 1.13 const char*& value) const
1304 {
1305 const XmlAttribute* attr = findAttribute(name);
1306
1307 if (!attr)
|
1308 david.dillard 1.32 return false;
|
1309 mike 1.13
1310 value = attr->value;
1311 return true;
1312 }
1313
1314 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1315 {
1316 const char* tmp;
1317
1318 if (!getAttributeValue(name, tmp))
|
1319 david.dillard 1.32 return false;
|
1320 mike 1.13
|
1321 chuck 1.28 value = String(tmp);
|
1322 mike 1.13 return true;
1323 }
1324
|
1325 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1326 mike 1.13 {
|
1327 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1328 mike 1.13 }
1329
1330 PEGASUS_NAMESPACE_END
|