1 martin 1.51 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.52 //
|
3 martin 1.51 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.52 //
|
10 martin 1.51 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.52 //
|
17 martin 1.51 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.52 //
|
20 martin 1.51 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.52 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.51 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.52 //
|
28 martin 1.51 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.13 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32 ////////////////////////////////////////////////////////////////////////////////
33 //
34 // XmlParser
35 //
|
36 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
37 // serveral rules for well-formed XML:
|
38 mike 1.13 //
|
39 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
42 mike 1.13 //
|
43 david.dillard 1.32 // 2. Comments have the form:
|
44 mike 1.13 //
|
45 david.dillard 1.32 // <!-- blah blah blah -->
|
46 mike 1.13 //
|
47 david.dillard 1.32 // 3. The following entity references are supported:
|
48 mike 1.13 //
|
49 david.dillard 1.32 // & - ampersand
50 // < - less-than
51 // > - greater-than
52 // " - full quote
53 // &apos - apostrophe
|
54 mike 1.13 //
|
55 kumpf 1.18 // as well as character (numeric) references:
|
56 mike 1.35 //
|
57 kumpf 1.18 // 1 - decimal reference for character '1'
58 // 1 - hexadecimal reference for character '1'
59 //
|
60 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
61 mike 1.13 //
|
62 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
63 mike 1.13 //
|
64 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
65 mike 1.13 //
|
66 david.dillard 1.32 // <![CDATA[
67 // ...
68 // ]]>
|
69 mike 1.13 //
|
70 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
73 // XmlAttribute values must be delimited.
|
74 mike 1.13 //
|
75 david.dillard 1.32 // 8. <!DOCTYPE...>
|
76 mike 1.13 //
77 // TODO:
78 //
|
79 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
80 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
81 mike 1.13 // rules rather than references to files).
82 //
|
83 david.dillard 1.32 // Remove newlines from string literals:
|
84 mike 1.13 //
85 // Example: <xyz x="hello
|
86 david.dillard 1.32 // world">
|
87 mike 1.13 //
88 ////////////////////////////////////////////////////////////////////////////////
89
|
90 sage 1.14 #include <Pegasus/Common/Config.h>
|
91 mike 1.13 #include <cctype>
92 #include <cstdio>
93 #include <cstdlib>
94 #include <cstring>
95 #include "XmlParser.h"
96 #include "Logger.h"
|
97 chuck 1.19 #include "ExceptionRep.h"
|
98 mike 1.34 #include "CharSet.h"
|
99 mike 1.13
100 PEGASUS_NAMESPACE_BEGIN
101
102 ////////////////////////////////////////////////////////////////////////////////
103 //
104 // Static helper functions
105 //
106 ////////////////////////////////////////////////////////////////////////////////
107
108 static void _printValue(const char* p)
109 {
110 for (; *p; p++)
111 {
|
112 david.dillard 1.32 if (*p == '\n')
113 PEGASUS_STD(cout) << "\\n";
114 else if (*p == '\r')
115 PEGASUS_STD(cout) << "\\r";
116 else if (*p == '\t')
117 PEGASUS_STD(cout) << "\\t";
118 else
119 PEGASUS_STD(cout) << *p;
|
120 mike 1.13 }
121 }
122
123 struct EntityReference
124 {
125 const char* match;
126 Uint32 length;
127 char replacement;
128 };
129
|
130 kumpf 1.18 // ATTN: Add support for more entity references
|
131 mike 1.13 static EntityReference _references[] =
132 {
133 { "&", 5, '&' },
134 { "<", 4, '<' },
135 { ">", 4, '>' },
136 { """, 6, '"' },
137 { "'", 6, '\'' }
138 };
139
|
140 chuck 1.26
141 // Implements a check for a whitespace character, without calling
142 // isspace( ). The isspace( ) function is locale-sensitive,
143 // and incorrectly flags some chars above 0x7f as whitespace. This
144 // causes the XmlParser to incorrectly parse UTF-8 data.
145 //
146 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
147 // defines white space as:
|
148 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
149 mike 1.34 static inline int _isspace(char c)
|
150 chuck 1.26 {
|
151 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
152 chuck 1.26 }
153
|
154 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
155
156 ////////////////////////////////////////////////////////////////////////////////
157 //
158 // XmlException
159 //
160 ////////////////////////////////////////////////////////////////////////////////
161
162 static const char* _xmlMessages[] =
163 {
164 "Bad opening element",
165 "Bad closing element",
166 "Bad attribute name",
167 "Exepected equal sign",
168 "Bad attribute value",
169 "A \"--\" sequence found within comment",
170 "Unterminated comment",
171 "Unterminated CDATA block",
172 "Unterminated DOCTYPE",
173 "Malformed reference",
174 "Expected a comment or CDATA following \"<!\" sequence",
175 mike 1.13 "Closing element does not match opening element",
176 "One or more tags are still open",
177 "More than one root element was encountered",
178 "Validation error",
|
179 kumpf 1.45 "Semantic error",
180 "Namespace not declared"
|
181 mike 1.13 };
182
|
183 david.dillard 1.32 static const char* _xmlKeys[] =
|
184 chuck 1.19 {
|
185 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
186 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
187 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
188 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
189 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
190 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
191 "Common.XmlParser.UNTERMINATED_COMMENT",
192 "Common.XmlParser.UNTERMINATED_CDATA",
193 "Common.XmlParser.UNTERMINATED_DOCTYPE",
194 "Common.XmlParser.MALFORMED_REFERENCE",
195 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
196 "Common.XmlParser.START_END_MISMATCH",
|
197 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
198 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
199 "Common.XmlParser.VALIDATION_ERROR",
|
200 kumpf 1.45 "Common.XmlParser.SEMANTIC_ERROR",
201 "Common.XmlParser.UNDECLARED_NAMESPACE"
|
202 chuck 1.19 };
203
204
|
205 kumpf 1.40 static MessageLoaderParms _formMessage(
206 Uint32 code,
207 Uint32 line,
208 const String& message)
|
209 chuck 1.19 {
210 String dftMsg = _xmlMessages[Uint32(code) - 1];
|
211 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
|
212 kumpf 1.45 String msg = message;
|
213 chuck 1.19
214 dftMsg.append(": on line $0");
215 if (message.size())
216 {
|
217 david.dillard 1.32 msg = ": " + msg;
218 dftMsg.append("$1");
219 }
|
220 chuck 1.19
|
221 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line ,msg);
|
222 chuck 1.19 }
223
224 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
225 {
226 String dftMsg = _xmlMessages[Uint32(code) - 1];
|
227 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
|
228 chuck 1.19
229 dftMsg.append(": on line $0");
|
230 david.dillard 1.32
|
231 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line);
|
232 chuck 1.19 }
233
|
234 mike 1.13
235 XmlException::XmlException(
|
236 david.dillard 1.32 XmlException::Code code,
|
237 mike 1.13 Uint32 lineNumber,
|
238 david.dillard 1.32 const String& message)
|
239 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
240 {
241
242 }
243
|
244 chuck 1.19
245 XmlException::XmlException(
|
246 david.dillard 1.32 XmlException::Code code,
|
247 chuck 1.19 Uint32 lineNumber,
|
248 david.dillard 1.32 MessageLoaderParms& msgParms)
|
249 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
250 {
|
251 david.dillard 1.32 if (msgParms.default_msg.size())
|
252 humberto 1.21 {
|
253 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
254 }
255 _rep->message.append(MessageLoader::getMessage(msgParms));
|
256 chuck 1.19 }
257
258
|
259 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
260 //
261 // XmlValidationError
262 //
263 ////////////////////////////////////////////////////////////////////////////////
264
265 XmlValidationError::XmlValidationError(
266 Uint32 lineNumber,
267 const String& message)
268 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
269 {
270 }
271
|
272 chuck 1.19
273 XmlValidationError::XmlValidationError(
274 Uint32 lineNumber,
275 MessageLoaderParms& msgParms)
276 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
277 {
278 }
279
280
|
281 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
282 //
283 // XmlSemanticError
284 //
285 ////////////////////////////////////////////////////////////////////////////////
286
287 XmlSemanticError::XmlSemanticError(
288 Uint32 lineNumber,
289 const String& message)
290 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
291 {
292 }
|
293 chuck 1.19
294
295 XmlSemanticError::XmlSemanticError(
296 Uint32 lineNumber,
297 MessageLoaderParms& msgParms)
298 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
299 {
300 }
301
|
302 mike 1.13
303 ////////////////////////////////////////////////////////////////////////////////
304 //
305 // XmlParser
306 //
307 ////////////////////////////////////////////////////////////////////////////////
308
|
309 kumpf 1.45 XmlParser::XmlParser(char* text, XmlNamespace* ns)
|
310 kumpf 1.40 : _line(1),
311 _current(text),
312 _restoreChar('\0'),
|
313 kumpf 1.45 _foundRoot(false),
314 _supportedNamespaces(ns),
315 // Start valid indexes with -2. -1 is reserved for not found.
316 _currentUnsupportedNSType(-2)
|
317 mike 1.13 {
318 }
319
|
320 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
321 {
322 while (*p && _isspace(*p))
323 {
324 if (*p == '\n')
325 line++;
326
327 p++;
328 }
329 }
330
|
331 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
332 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
333 #pragma optimize( "", off )
334 #endif
|
335 kumpf 1.37 static int _getEntityRef(char*& p)
336 {
337 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
338 {
339 p += 3;
340 return '>';
341 }
342
343 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
344 {
345 p += 3;
346 return '<';
347 }
348
349 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
350 (p[4] == ';'))
351 {
352 p += 5;
353 return '\'';
354 }
355
356 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
357 (p[4] == ';'))
358 {
359 p += 5;
360 return '"';
361 }
362
363 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
364 {
365 p += 4;
366 return '&';
367 }
368
369 return -1;
370 }
|
371 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
372 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
|
373 kumpf 1.45 #pragma optimize( "", on )
|
374 s.manicka 1.43 #endif
|
375 kumpf 1.37
|
376 kumpf 1.50 static inline int _getCharRef(char*& p)
|
377 kumpf 1.37 {
378 char* end;
379 unsigned long ch;
|
380 kumpf 1.50 Boolean hex = false;
|
381 kumpf 1.37
|
382 kumpf 1.50 if (*p == 'x')
|
383 kumpf 1.37 {
|
384 kumpf 1.50 hex = true;
385 ch = strtoul(++p, &end, 16);
|
386 kumpf 1.37 }
387 else
388 {
389 ch = strtoul(p, &end, 10);
390 }
391
392 if ((end == p) || (*end != ';') || (ch > 255))
393 {
394 return -1;
395 }
396
397 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
398 {
399 return -1;
400 }
401
402 p = end + 1;
403
404 return ch;
405 }
406
|
407 kumpf 1.50 // Parse an entity reference or a character reference
408 static inline int _getRef(Uint32 line, char*& p)
409 {
410 int ch;
411
412 if (*p == '#')
413 {
414 ch = _getCharRef(++p);
415 }
416 else
417 {
418 ch = _getEntityRef(p);
419 }
420
421 if (ch == -1)
422 {
423 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
424 }
425
426 return ch;
427 }
428 kumpf 1.50
429 static inline void _normalizeElementValue(
430 Uint32& line,
431 char*& p)
432 {
433 // Process one character at a time:
434
435 char* q = p;
436
437 while (*p && (*p != '<'))
438 {
439 if (_isspace(*p))
440 {
441 // Trim whitespace from the end of the value, but do not compress
442 // whitespace within the value.
443
444 const char* start = p;
445
446 if (*p++ == '\n')
447 {
448 line++;
449 kumpf 1.50 }
450
451 _skipWhitespace(line, p);
452
453 if (*p && (*p != '<'))
454 {
455 // Transfer internal whitespace to q without compressing it.
456 const char* i = start;
457 while (i < p)
458 {
459 *q++ = *i++;
460 }
461 }
462 else
463 {
464 // Do not transfer trailing whitespace to q.
465 break;
466 }
467 }
468 else if (*p == '&')
469 {
470 kumpf 1.50 // Process an entity reference or a character reference.
471
472 *q++ = _getRef(line, ++p);
473 }
474 else
475 {
476 *q++ = *p++;
477 }
478 }
479
480 // If q got behind p, it is safe and necessary to null-terminate q
481
482 if (q != p)
483 {
484 *q = '\0';
485 }
486 }
487
488 static inline void _normalizeAttributeValue(
489 Uint32& line,
490 char*& p,
491 kumpf 1.50 char end_char,
492 char*& start)
|
493 kumpf 1.37 {
494 // Skip over leading whitespace:
495
496 _skipWhitespace(line, p);
497 start = p;
498
499 // Process one character at a time:
500
501 char* q = p;
502
503 while (*p && (*p != end_char))
504 {
505 if (_isspace(*p))
506 {
507 // Compress sequences of whitespace characters to a single space
508 // character. Update line number when newlines encountered.
509
510 if (*p++ == '\n')
511 {
512 line++;
513 }
514 kumpf 1.37
515 *q++ = ' ';
516
517 _skipWhitespace(line, p);
518 }
519 else if (*p == '&')
520 {
|
521 kumpf 1.50 // Process an entity reference or a character reference.
|
522 kumpf 1.37
|
523 kumpf 1.50 *q++ = _getRef(line, ++p);
|
524 kumpf 1.37 }
525 else
526 {
527 *q++ = *p++;
528 }
529 }
530
531 // Remove single trailing whitespace (consecutive whitespaces already
532 // compressed above). Since p >= q, we can tell if we need to strip a
533 // trailing space from q by looking at the end of p. We must not look at
534 // the last character of p, though, if p is an empty string.
|
535 dmitry.mikulin 1.44 Boolean adjust_q = (p != start) && _isspace(p[-1]);
536
537 // We encountered a the end_char or a zero-terminator.
538
539 *q = *p;
|
540 kumpf 1.37
|
541 dmitry.mikulin 1.44 if (adjust_q)
|
542 kumpf 1.37 {
543 q--;
544 }
545
546 // If q got behind p, it is safe and necessary to null-terminate q
547
548 if (q != p)
549 {
550 *q = '\0';
551 }
552 }
553
|
554 kumpf 1.45 Boolean XmlParser::next(
555 XmlEntry& entry,
556 Boolean includeComment)
|
557 mike 1.13 {
558 if (!_putBackStack.isEmpty())
559 {
|
560 david.dillard 1.32 entry = _putBackStack.top();
561 _putBackStack.pop();
562 return true;
|
563 mike 1.13 }
564
565 // If a character was overwritten with a null-terminator the last
566 // time this routine was called, then put back that character. Before
567 // exiting of course, restore the null-terminator.
568
569 char* nullTerminator = 0;
570
571 if (_restoreChar && !*_current)
572 {
|
573 david.dillard 1.32 nullTerminator = _current;
574 *_current = _restoreChar;
575 _restoreChar = '\0';
|
576 mike 1.13 }
577
|
578 kumpf 1.45 entry.attributes.clear();
579
580 if (_supportedNamespaces)
581 {
582 // Remove namespaces of a deeper scope level from the stack.
583 while (!_nameSpaces.isEmpty() &&
584 _nameSpaces.top().scopeLevel > _stack.size())
585 {
586 _nameSpaces.pop();
587 }
588 }
589
|
590 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
591 do
592 {
593 // Skip over any whitespace:
594 _skipWhitespace(_line, _current);
595
596 if (!*_current)
597 {
598 if (nullTerminator)
599 *nullTerminator = '\0';
|
600 mike 1.13
|
601 venkat.puvvada 1.41 if (!_stack.isEmpty())
602 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
603 mike 1.13
|
604 venkat.puvvada 1.41 return false;
605 }
|
606 mike 1.13
|
607 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
608 mike 1.13
|
609 venkat.puvvada 1.41 if (*_current == '<')
610 {
611 _current++;
612 _getElement(_current, entry);
|
613 mike 1.13
|
614 venkat.puvvada 1.41 if (nullTerminator)
615 *nullTerminator = '\0';
|
616 mike 1.13
|
617 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
618 {
619 if (_stack.isEmpty() && _foundRoot)
620 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
621 mike 1.13
|
622 venkat.puvvada 1.41 _foundRoot = true;
623 _stack.push((char*)entry.text);
624 }
625 else if (entry.type == XmlEntry::END_TAG)
626 {
627 if (_stack.isEmpty())
628 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
629 mike 1.13
|
630 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
631 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
632 david.dillard 1.32
|
633 venkat.puvvada 1.41 _stack.pop();
634 }
|
635 david.dillard 1.32 }
|
636 venkat.puvvada 1.41 else
|
637 david.dillard 1.32 {
|
638 venkat.puvvada 1.41 // Normalize the content:
|
639 mike 1.13
|
640 kumpf 1.50 char* start = _current;
641 _normalizeElementValue(_line, _current);
|
642 mike 1.13
|
643 venkat.puvvada 1.41 // Get the content:
|
644 mike 1.13
|
645 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
646 entry.text = start;
|
647 kumpf 1.37
|
648 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
649 kumpf 1.37
|
650 venkat.puvvada 1.41 _restoreChar = *_current;
651 *_current = '\0';
|
652 kumpf 1.37
|
653 venkat.puvvada 1.41 if (nullTerminator)
654 *nullTerminator = '\0';
655 }
|
656 kumpf 1.45 } while (!includeComment && entry.type == XmlEntry::COMMENT);
657
658 if (_supportedNamespaces &&
659 (entry.type == XmlEntry::START_TAG ||
660 entry.type == XmlEntry::EMPTY_TAG ||
661 entry.type == XmlEntry::END_TAG))
662 {
663 // Determine the namespace type for this entry
664
665 if (entry.type == XmlEntry::START_TAG ||
666 entry.type == XmlEntry::EMPTY_TAG)
667 {
668 // Process namespace declarations and determine the namespace type
669 // for the attributes.
670
671 Uint32 scopeLevel = _stack.size();
672 if (entry.type == XmlEntry::EMPTY_TAG)
673 {
674 // Empty tags are deeper scope, but not pushed onto the stack
675 scopeLevel++;
676 }
677 kumpf 1.45
678 for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++)
679 {
680 XmlAttribute& attr = entry.attributes[i];
681 if ((strncmp(attr.name, "xmlns:", 6) == 0) ||
682 (strcmp(attr.name, "xmlns") == 0))
683 {
684 // Process a namespace declaration
685 XmlNamespace ns;
686 if (attr.name[5] == ':')
687 {
688 ns.localName = attr.localName;
689 }
690 else
691 {
692 // Default name space has no local name
693 ns.localName = 0;
694 }
695 ns.extendedName = attr.value;
696 ns.scopeLevel = scopeLevel;
697 ns.type = _getSupportedNamespaceType(ns.extendedName);
698 kumpf 1.45
699 // If the namespace is not supported, assign it a unique
700 // negative identifier.
701 if (ns.type == -1)
702 {
703 ns.type = _currentUnsupportedNSType--;
704 }
705
706 _nameSpaces.push(ns);
707 }
708 else
709 {
710 // Get the namespace type for this attribute.
711 attr.nsType = _getNamespaceType(attr.name);
712 }
713 }
714 }
715
716 entry.nsType = _getNamespaceType(entry.text);
717 }
718 else
719 kumpf 1.45 {
720 entry.nsType = -1;
721 }
|
722 kumpf 1.37
|
723 venkat.puvvada 1.41 return true;
|
724 mike 1.13 }
725
|
726 kumpf 1.45 // Get the namespace type of the given tag
727 int XmlParser::_getNamespaceType(const char* tag)
728 {
729 const char* pos = strchr(tag, ':');
730
731 // If ':' is not found, the tag is not namespace qualified and we
732 // need to look for the default name space.
733
734 // Search the namespace stack from the top
735 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
736 {
737 // If ':' is found, look for the name space with the matching
738 // local name...
739 if ((pos && _nameSpaces[i].localName &&
740 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
741 // ... otherwise look for the default name space. It's the
742 // one with localName set to NULL
743 (!pos && !_nameSpaces[i].localName))
744 {
745 return _nameSpaces[i].type;
746 }
747 kumpf 1.45 }
748
749 // If the tag is namespace qualified, but the name space has not been
750 // declared, it's malformed XML and we must throw an exception.
751 // Note: The "xml" namespace is specifically defined by the W3C as a
752 // reserved prefix ("http://www.w3.org/XML/1998/namespace").
753 if (pos && (strncmp(tag, "xml:", 4) != 0))
754 {
755 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
756 }
757
758 // Otherwise it's OK not to have a name space.
759 return -1;
760 }
761
762 // Given the extended namespace name, find it in the table of supported
763 // namespaces and return its type.
764 int XmlParser::_getSupportedNamespaceType(const char* extendedName)
765 {
766 for (Sint32 i = 0;
767 _supportedNamespaces[i].localName != 0;
768 kumpf 1.45 i++)
769 {
770 PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
771 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
772 {
773 return _supportedNamespaces[i].type;
774 }
775 }
776 return -1;
777 }
778
779 XmlNamespace* XmlParser::getNamespace(int nsType)
780 {
781 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
782 {
783 if (_nameSpaces[i].type == nsType)
784 {
785 return &_nameSpaces[i];
786 }
787 }
788 return 0;
789 kumpf 1.45 }
790
|
791 mike 1.13 void XmlParser::putBack(XmlEntry& entry)
792 {
793 _putBackStack.push(entry);
794 }
795
796 XmlParser::~XmlParser()
797 {
798 // Nothing to do!
799 }
800
|
801 kumpf 1.45 // A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)
|
802 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
803 mike 1.35 {
804 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
805 kumpf 1.45 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
|
806 mike 1.35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
807 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
808 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
809 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
810 thilo.boehm 1.49 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
811 mike 1.35 };
812
|
813 kumpf 1.45 inline Boolean _getQName(char*& p, const char*& localName)
|
814 mike 1.13 {
|
815 kumpf 1.45 localName = p;
816
|
817 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
818 kumpf 1.45 return false;
|
819 mike 1.35
|
820 kumpf 1.24 p++;
|
821 mike 1.13
|
822 thilo.boehm 1.49 // No explicit test for NULL termination is needed.
823 // On position 0 of the array false is returned.
824 while (_isInnerElementChar[Uint8(*p)])
|
825 david.dillard 1.32 p++;
|
826 mike 1.13
|
827 kumpf 1.45 // We've validated the prefix, now validate the local name
828 if (*p == ':')
829 {
830 localName = ++p;
831
832 if (!CharSet::isAlNumUnder(Uint8(*p)))
833 return false;
834
835 p++;
|
836 thilo.boehm 1.49 // No explicit test for NULL termination is needed.
837 // On position 0 of the array false is returned.
838 while (_isInnerElementChar[Uint8(*p)])
|
839 kumpf 1.45 p++;
840 }
841
842 return true;
843 }
844
845 Boolean XmlParser::_getElementName(char*& p, const char*& localName)
846 {
847 if (!_getQName(p, localName))
848 throw XmlException(XmlException::BAD_START_TAG, _line);
849
|
850 mike 1.13 // The next character must be a space:
851
|
852 chuck 1.26 if (_isspace(*p))
|
853 mike 1.13 {
|
854 david.dillard 1.32 *p++ = '\0';
|
855 mike 1.34 _skipWhitespace(_line, p);
|
856 mike 1.13 }
857
858 if (*p == '>')
859 {
|
860 david.dillard 1.32 *p++ = '\0';
861 return true;
|
862 mike 1.13 }
863
864 return false;
865 }
866
|
867 kumpf 1.45 Boolean XmlParser::_getOpenElementName(
868 char*& p,
869 const char*& localName,
870 Boolean& openCloseElement)
|
871 mike 1.13 {
872 openCloseElement = false;
873
|
874 kumpf 1.45 if (!_getQName(p, localName))
|
875 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
876 mike 1.35
|
877 mike 1.13 // The next character must be a space:
878
|
879 chuck 1.26 if (_isspace(*p))
|
880 mike 1.13 {
|
881 david.dillard 1.32 *p++ = '\0';
|
882 mike 1.34 _skipWhitespace(_line, p);
|
883 mike 1.13 }
884
885 if (*p == '>')
886 {
|
887 david.dillard 1.32 *p++ = '\0';
888 return true;
|
889 mike 1.13 }
890
891 if (p[0] == '/' && p[1] == '>')
892 {
|
893 david.dillard 1.32 openCloseElement = true;
894 *p = '\0';
895 p += 2;
896 return true;
|
897 mike 1.13 }
898
899 return false;
900 }
901
|
902 kumpf 1.45 void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName)
|
903 mike 1.13 {
|
904 kumpf 1.45 if (!_getQName(p, localName))
|
905 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
906 mike 1.35
|
907 mike 1.13 char* term = p;
908
|
909 mike 1.34 _skipWhitespace(_line, p);
|
910 mike 1.13
911 if (*p != '=')
|
912 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
913 mike 1.13
914 p++;
915
|
916 mike 1.34 _skipWhitespace(_line, p);
|
917 mike 1.13
918 *term = '\0';
919 }
920
921 void XmlParser::_getComment(char*& p)
922 {
923 // Now p points to first non-whitespace character beyond "<--" sequence:
924
925 for (; *p; p++)
926 {
|
927 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
928 {
929 if (p[2] != '>')
930 {
931 throw XmlException(
932 XmlException::MINUS_MINUS_IN_COMMENT, _line);
933 }
934
935 // Find end of comment (excluding whitespace):
936
937 *p = '\0';
938 p += 3;
939 return;
940 }
|
941 mike 1.13 }
942
943 // If it got this far, then the comment is unterminated:
944
945 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
946 }
947
948 void XmlParser::_getCData(char*& p)
949 {
950 // At this point p points one past "<![CDATA[" sequence:
951
952 for (; *p; p++)
953 {
|
954 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
955 {
956 *p = '\0';
957 p += 3;
958 return;
959 }
960 else if (*p == '\n')
961 _line++;
|
962 mike 1.13 }
963
964 // If it got this far, then the comment is unterminated:
965
966 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
967 }
968
969 void XmlParser::_getDocType(char*& p)
970 {
971 // Just ignore the DOCTYPE command for now:
972
973 for (; *p && *p != '>'; p++)
974 {
|
975 david.dillard 1.32 if (*p == '\n')
976 _line++;
|
977 mike 1.13 }
978
979 if (*p != '>')
|
980 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
981 mike 1.13
982 p++;
983 }
984
985 void XmlParser::_getElement(char*& p, XmlEntry& entry)
986 {
987 //--------------------------------------------------------------------------
988 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
989 //--------------------------------------------------------------------------
990
991 if (*p == '?')
992 {
|
993 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
994 entry.text = ++p;
|
995 mike 1.13
|
996 kumpf 1.45 if (_getElementName(p, entry.localName))
|
997 david.dillard 1.32 return;
|
998 mike 1.13 }
999 else if (*p == '!')
1000 {
|
1001 david.dillard 1.32 p++;
|
1002 mike 1.13
|
1003 david.dillard 1.32 // Expect a comment or CDATA:
|
1004 mike 1.13
|
1005 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
1006 {
1007 p += 2;
1008 entry.type = XmlEntry::COMMENT;
1009 entry.text = p;
1010 _getComment(p);
1011 return;
1012 }
1013 else if (memcmp(p, "[CDATA[", 7) == 0)
1014 {
1015 p += 7;
1016 entry.type = XmlEntry::CDATA;
1017 entry.text = p;
1018 _getCData(p);
1019 return;
1020 }
1021 else if (memcmp(p, "DOCTYPE", 7) == 0)
1022 {
1023 entry.type = XmlEntry::DOCTYPE;
|
1024 kumpf 1.37 entry.text = "";
|
1025 david.dillard 1.32 _getDocType(p);
1026 return;
1027 }
1028 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
1029 mike 1.13 }
1030 else if (*p == '/')
1031 {
|
1032 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
1033 entry.text = ++p;
|
1034 mike 1.13
|
1035 kumpf 1.45 if (!_getElementName(p, entry.localName))
|
1036 david.dillard 1.32 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
1037 mike 1.13
|
1038 david.dillard 1.32 return;
|
1039 mike 1.13 }
|
1040 thilo.boehm 1.49 else if (CharSet::isAlphaUnder(Uint8(*p)))
|
1041 mike 1.13 {
|
1042 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
1043 entry.text = p;
|
1044 mike 1.13
|
1045 david.dillard 1.32 Boolean openCloseElement = false;
|
1046 mike 1.13
|
1047 kumpf 1.45 if (_getOpenElementName(p, entry.localName, openCloseElement))
|
1048 david.dillard 1.32 {
1049 if (openCloseElement)
1050 entry.type = XmlEntry::EMPTY_TAG;
1051 return;
1052 }
|
1053 mike 1.13 }
1054 else
|
1055 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
1056 mike 1.13
1057 //--------------------------------------------------------------------------
1058 // Grab all the attributes:
1059 //--------------------------------------------------------------------------
1060
1061 for (;;)
1062 {
|
1063 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
1064 {
1065 if (p[0] == '?' && p[1] == '>')
1066 {
1067 p += 2;
1068 return;
1069 }
1070 }
1071 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
1072 {
1073 entry.type = XmlEntry::EMPTY_TAG;
1074 p += 2;
1075 return;
1076 }
1077 else if (*p == '>')
1078 {
1079 p++;
1080 return;
1081 }
1082
1083 XmlAttribute attr;
|
1084 kumpf 1.45 attr.nsType = -1;
|
1085 david.dillard 1.32 attr.name = p;
|
1086 kumpf 1.45 _getAttributeNameAndEqual(p, attr.localName);
|
1087 david.dillard 1.32
|
1088 kumpf 1.37 // Get the attribute value (e.g., "some value")
1089 {
1090 if ((*p != '"') && (*p != '\''))
1091 {
1092 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1093 }
1094
1095 char quote = *p++;
1096
1097 char* start;
|
1098 kumpf 1.50 _normalizeAttributeValue(_line, p, quote, start);
|
1099 kumpf 1.37 attr.value = start;
1100
1101 if (*p != quote)
1102 {
1103 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1104 }
1105
1106 // Overwrite the closing quote with a null-terminator:
|
1107 david.dillard 1.32
|
1108 kumpf 1.37 *p++ = '\0';
1109 }
|
1110 david.dillard 1.32
1111 if (entry.type == XmlEntry::XML_DECLARATION)
1112 {
1113 // The next thing must a space or a "?>":
|
1114 mike 1.13
|
1115 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
1116 {
1117 throw XmlException(
1118 XmlException::BAD_ATTRIBUTE_VALUE, _line);
1119 }
1120 }
1121 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
1122 {
1123 // The next thing must be a space or a '>':
|
1124 mike 1.13
|
1125 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1126 }
|
1127 mike 1.13
|
1128 mike 1.34 _skipWhitespace(_line, p);
|
1129 david.dillard 1.32
|
1130 kumpf 1.45 entry.attributes.append(attr);
|
1131 mike 1.13 }
1132 }
1133
1134 static const char* _typeStrings[] =
1135 {
|
1136 david.dillard 1.32 "XML_DECLARATION",
1137 "START_TAG",
1138 "EMPTY_TAG",
1139 "END_TAG",
|
1140 mike 1.13 "COMMENT",
1141 "CDATA",
1142 "DOCTYPE",
|
1143 david.dillard 1.32 "CONTENT"
|
1144 mike 1.13 };
1145
1146 void XmlEntry::print() const
1147 {
1148 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1149
1150 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1151
1152 if (needQuotes)
|
1153 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1154
|
1155 mike 1.13 _printValue(text);
1156
1157 if (needQuotes)
|
1158 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1159 mike 1.13
1160 PEGASUS_STD(cout) << '\n';
1161
|
1162 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
|
1163 mike 1.13 {
|
1164 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1165 _printValue(attributes[i].value);
1166 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1167 mike 1.13 }
1168 }
1169
1170 const XmlAttribute* XmlEntry::findAttribute(
1171 const char* name) const
1172 {
|
1173 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
|
1174 mike 1.13 {
|
1175 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1176 return &attributes[i];
|
1177 mike 1.13 }
1178
1179 return 0;
1180 }
1181
|
1182 kumpf 1.45 const XmlAttribute* XmlEntry::findAttribute(
|
1183 kumpf 1.47 int attrNsType,
|
1184 kumpf 1.45 const char* name) const
1185 {
1186 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
1187 {
|
1188 kumpf 1.47 if ((attributes[i].nsType == attrNsType) &&
|
1189 kumpf 1.45 (strcmp(attributes[i].localName, name) == 0))
1190 {
1191 return &attributes[i];
1192 }
1193 }
1194
1195 return 0;
1196 }
1197
|
1198 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace
1199 // character (set last one past this). For example, consider this string:
1200 //
|
1201 david.dillard 1.32 // " 87 "
|
1202 mike 1.13 //
1203 // The first pointer would point to '8' and the last pointer woudl point one
1204 // beyond '7'.
1205
1206 static void _findEnds(
|
1207 david.dillard 1.32 const char* str,
1208 const char*& first,
|
1209 mike 1.13 const char*& last)
1210 {
1211 first = str;
1212
|
1213 chuck 1.26 while (_isspace(*first))
|
1214 david.dillard 1.32 first++;
|
1215 mike 1.13
1216 if (!*first)
1217 {
|
1218 david.dillard 1.32 last = first;
1219 return;
|
1220 mike 1.13 }
1221
1222 last = first + strlen(first);
1223
|
1224 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1225 david.dillard 1.32 last--;
|
1226 mike 1.13 }
1227
1228 Boolean XmlEntry::getAttributeValue(
|
1229 david.dillard 1.32 const char* name,
|
1230 mike 1.13 Uint32& value) const
1231 {
1232 const XmlAttribute* attr = findAttribute(name);
1233
1234 if (!attr)
|
1235 david.dillard 1.32 return false;
|
1236 mike 1.13
1237 const char* first;
1238 const char* last;
1239 _findEnds(attr->value, first, last);
1240
1241 char* end = 0;
1242 long tmp = strtol(first, &end, 10);
1243
1244 if (!end || end != last)
|
1245 david.dillard 1.32 return false;
|
1246 mike 1.13
1247 value = Uint32(tmp);
1248 return true;
1249 }
1250
1251 Boolean XmlEntry::getAttributeValue(
|
1252 david.dillard 1.32 const char* name,
|
1253 mike 1.13 Real32& value) const
1254 {
1255 const XmlAttribute* attr = findAttribute(name);
1256
1257 if (!attr)
|
1258 david.dillard 1.32 return false;
|
1259 mike 1.13
1260 const char* first;
1261 const char* last;
1262 _findEnds(attr->value, first, last);
1263
1264 char* end = 0;
1265 double tmp = strtod(first, &end);
1266
1267 if (!end || end != last)
|
1268 david.dillard 1.32 return false;
|
1269 mike 1.13
|
1270 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1271 mike 1.13 return true;
1272 }
1273
1274 Boolean XmlEntry::getAttributeValue(
|
1275 david.dillard 1.32 const char* name,
|
1276 mike 1.13 const char*& value) const
1277 {
1278 const XmlAttribute* attr = findAttribute(name);
1279
1280 if (!attr)
|
1281 david.dillard 1.32 return false;
|
1282 mike 1.13
1283 value = attr->value;
1284 return true;
1285 }
1286
1287 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1288 {
1289 const char* tmp;
1290
1291 if (!getAttributeValue(name, tmp))
|
1292 david.dillard 1.32 return false;
|
1293 mike 1.13
|
1294 chuck 1.28 value = String(tmp);
|
1295 mike 1.13 return true;
1296 }
1297
|
1298 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1299 mike 1.13 {
|
1300 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1301 mike 1.13 }
1302
1303 PEGASUS_NAMESPACE_END
|