1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 ////////////////////////////////////////////////////////////////////////////////
35 //
36 // XmlParser
37 //
|
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
39 // serveral rules for well-formed XML:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
42 mike 1.13 //
|
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
44 mike 1.13 //
|
45 david.dillard 1.32 // 2. Comments have the form:
|
46 mike 1.13 //
|
47 david.dillard 1.32 // <!-- blah blah blah -->
|
48 mike 1.13 //
|
49 david.dillard 1.32 // 3. The following entity references are supported:
|
50 mike 1.13 //
|
51 david.dillard 1.32 // & - ampersand
52 // < - less-than
53 // > - greater-than
54 // " - full quote
55 // &apos - apostrophe
|
56 mike 1.13 //
|
57 kumpf 1.18 // as well as character (numeric) references:
|
58 mike 1.35 //
|
59 kumpf 1.18 // 1 - decimal reference for character '1'
60 // 1 - hexadecimal reference for character '1'
61 //
|
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
63 mike 1.13 //
|
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
65 mike 1.13 //
|
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
67 mike 1.13 //
|
68 david.dillard 1.32 // <![CDATA[
69 // ...
70 // ]]>
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
73 mike 1.13 //
|
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
75 // XmlAttribute values must be delimited.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 8. <!DOCTYPE...>
|
78 mike 1.13 //
79 // TODO:
80 //
|
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
83 mike 1.13 // rules rather than references to files).
84 //
|
85 david.dillard 1.32 // Remove newlines from string literals:
|
86 mike 1.13 //
87 // Example: <xyz x="hello
|
88 david.dillard 1.32 // world">
|
89 mike 1.13 //
90 ////////////////////////////////////////////////////////////////////////////////
91
|
92 sage 1.14 #include <Pegasus/Common/Config.h>
|
93 mike 1.13 #include <cctype>
94 #include <cstdio>
95 #include <cstdlib>
96 #include <cstring>
97 #include "XmlParser.h"
98 #include "Logger.h"
|
99 chuck 1.19 #include "ExceptionRep.h"
|
100 mike 1.34 #include "CharSet.h"
|
101 mike 1.13
102 PEGASUS_NAMESPACE_BEGIN
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
|
114 david.dillard 1.32 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
|
122 mike 1.13 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
|
142 chuck 1.26
143 // Implements a check for a whitespace character, without calling
144 // isspace( ). The isspace( ) function is locale-sensitive,
145 // and incorrectly flags some chars above 0x7f as whitespace. This
146 // causes the XmlParser to incorrectly parse UTF-8 data.
147 //
148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
149 // defines white space as:
|
150 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
151 mike 1.34 static inline int _isspace(char c)
|
152 chuck 1.26 {
|
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
154 chuck 1.26 }
155
|
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
157
158 ////////////////////////////////////////////////////////////////////////////////
159 //
160 // XmlException
161 //
162 ////////////////////////////////////////////////////////////////////////////////
163
164 static const char* _xmlMessages[] =
165 {
166 "Bad opening element",
167 "Bad closing element",
168 "Bad attribute name",
169 "Exepected equal sign",
170 "Bad attribute value",
171 "A \"--\" sequence found within comment",
172 "Unterminated comment",
173 "Unterminated CDATA block",
174 "Unterminated DOCTYPE",
175 "Malformed reference",
176 "Expected a comment or CDATA following \"<!\" sequence",
177 mike 1.13 "Closing element does not match opening element",
178 "One or more tags are still open",
179 "More than one root element was encountered",
180 "Validation error",
|
181 kumpf 1.45 "Semantic error",
182 "Namespace not declared"
|
183 mike 1.13 };
184
|
185 david.dillard 1.32 static const char* _xmlKeys[] =
|
186 chuck 1.19 {
|
187 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
188 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
189 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
190 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
191 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
192 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
193 "Common.XmlParser.UNTERMINATED_COMMENT",
194 "Common.XmlParser.UNTERMINATED_CDATA",
195 "Common.XmlParser.UNTERMINATED_DOCTYPE",
196 "Common.XmlParser.MALFORMED_REFERENCE",
197 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
198 "Common.XmlParser.START_END_MISMATCH",
|
199 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
200 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
201 "Common.XmlParser.VALIDATION_ERROR",
|
202 kumpf 1.45 "Common.XmlParser.SEMANTIC_ERROR",
203 "Common.XmlParser.UNDECLARED_NAMESPACE"
|
204 chuck 1.19 };
205
206
|
207 kumpf 1.40 static MessageLoaderParms _formMessage(
208 Uint32 code,
209 Uint32 line,
210 const String& message)
|
211 chuck 1.19 {
212 String dftMsg = _xmlMessages[Uint32(code) - 1];
213 String key = _xmlKeys[Uint32(code) - 1];
|
214 kumpf 1.45 String msg = message;
|
215 chuck 1.19
216 dftMsg.append(": on line $0");
217 if (message.size())
218 {
|
219 david.dillard 1.32 msg = ": " + msg;
220 dftMsg.append("$1");
221 }
|
222 chuck 1.19
223 return MessageLoaderParms(key, dftMsg, line ,msg);
224 }
225
226 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
227 {
228 String dftMsg = _xmlMessages[Uint32(code) - 1];
229 String key = _xmlKeys[Uint32(code) - 1];
230
231 dftMsg.append(": on line $0");
|
232 david.dillard 1.32
|
233 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
234 }
235
|
236 mike 1.13
237 XmlException::XmlException(
|
238 david.dillard 1.32 XmlException::Code code,
|
239 mike 1.13 Uint32 lineNumber,
|
240 david.dillard 1.32 const String& message)
|
241 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
242 {
243
244 }
245
|
246 chuck 1.19
247 XmlException::XmlException(
|
248 david.dillard 1.32 XmlException::Code code,
|
249 chuck 1.19 Uint32 lineNumber,
|
250 david.dillard 1.32 MessageLoaderParms& msgParms)
|
251 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
252 {
|
253 david.dillard 1.32 if (msgParms.default_msg.size())
|
254 humberto 1.21 {
|
255 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
256 }
257 _rep->message.append(MessageLoader::getMessage(msgParms));
|
258 chuck 1.19 }
259
260
|
261 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
262 //
263 // XmlValidationError
264 //
265 ////////////////////////////////////////////////////////////////////////////////
266
267 XmlValidationError::XmlValidationError(
268 Uint32 lineNumber,
269 const String& message)
270 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
271 {
272 }
273
|
274 chuck 1.19
275 XmlValidationError::XmlValidationError(
276 Uint32 lineNumber,
277 MessageLoaderParms& msgParms)
278 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
279 {
280 }
281
282
|
283 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
284 //
285 // XmlSemanticError
286 //
287 ////////////////////////////////////////////////////////////////////////////////
288
289 XmlSemanticError::XmlSemanticError(
290 Uint32 lineNumber,
291 const String& message)
292 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
293 {
294 }
|
295 chuck 1.19
296
297 XmlSemanticError::XmlSemanticError(
298 Uint32 lineNumber,
299 MessageLoaderParms& msgParms)
300 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
301 {
302 }
303
|
304 mike 1.13
305 ////////////////////////////////////////////////////////////////////////////////
306 //
307 // XmlParser
308 //
309 ////////////////////////////////////////////////////////////////////////////////
310
|
311 kumpf 1.45 XmlParser::XmlParser(char* text, XmlNamespace* ns)
|
312 kumpf 1.40 : _line(1),
313 _current(text),
314 _restoreChar('\0'),
|
315 kumpf 1.45 _foundRoot(false),
316 _supportedNamespaces(ns),
317 // Start valid indexes with -2. -1 is reserved for not found.
318 _currentUnsupportedNSType(-2)
|
319 mike 1.13 {
320 }
321
|
322 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
323 {
324 while (*p && _isspace(*p))
325 {
326 if (*p == '\n')
327 line++;
328
329 p++;
330 }
331 }
332
|
333 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
334 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
335 #pragma optimize( "", off )
336 #endif
|
337 kumpf 1.37 static int _getEntityRef(char*& p)
338 {
339 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
340 {
341 p += 3;
342 return '>';
343 }
344
345 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
346 {
347 p += 3;
348 return '<';
349 }
350
351 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
352 (p[4] == ';'))
353 {
354 p += 5;
355 return '\'';
356 }
357
358 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
359 (p[4] == ';'))
360 {
361 p += 5;
362 return '"';
363 }
364
365 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
366 {
367 p += 4;
368 return '&';
369 }
370
371 return -1;
372 }
|
373 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
374 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
|
375 kumpf 1.45 #pragma optimize( "", on )
|
376 s.manicka 1.43 #endif
|
377 kumpf 1.37
378 static inline int _getCharRef(char*& p, bool hex)
379 {
380 char* end;
381 unsigned long ch;
382
383 if (hex)
384 {
385 ch = strtoul(p, &end, 16);
386 }
387 else
388 {
389 ch = strtoul(p, &end, 10);
390 }
391
392 if ((end == p) || (*end != ';') || (ch > 255))
393 {
394 return -1;
395 }
396
397 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
398 kumpf 1.37 {
399 return -1;
400 }
401
402 p = end + 1;
403
404 return ch;
405 }
406
407 static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
408 {
409 // Skip over leading whitespace:
410
411 _skipWhitespace(line, p);
412 start = p;
413
414 // Process one character at a time:
415
416 char* q = p;
417
418 while (*p && (*p != end_char))
419 kumpf 1.37 {
420 if (_isspace(*p))
421 {
422 // Compress sequences of whitespace characters to a single space
423 // character. Update line number when newlines encountered.
424
425 if (*p++ == '\n')
426 {
427 line++;
428 }
429
430 *q++ = ' ';
431
432 _skipWhitespace(line, p);
433 }
434 else if (*p == '&')
435 {
436 // Process entity characters and entity references:
437
438 p++;
439 int ch;
440 kumpf 1.37
441 if (*p == '#')
442 {
443 *p++;
444
445 if (*p == 'x')
446 {
447 p++;
448 ch = _getCharRef(p, true);
449 }
450 else
451 {
452 ch = _getCharRef(p, false);
453 }
454 }
455 else
456 {
457 ch = _getEntityRef(p);
458 }
459
460 if (ch == -1)
461 kumpf 1.37 {
462 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
463 }
464
465 *q++ = ch;
466 }
467 else
468 {
469 *q++ = *p++;
470 }
471 }
472
473 // Remove single trailing whitespace (consecutive whitespaces already
474 // compressed above). Since p >= q, we can tell if we need to strip a
475 // trailing space from q by looking at the end of p. We must not look at
476 // the last character of p, though, if p is an empty string.
|
477 dmitry.mikulin 1.44 Boolean adjust_q = (p != start) && _isspace(p[-1]);
478
479 // We encountered a the end_char or a zero-terminator.
480
481 *q = *p;
|
482 kumpf 1.37
|
483 dmitry.mikulin 1.44 if (adjust_q)
|
484 kumpf 1.37 {
485 q--;
486 }
487
488 // If q got behind p, it is safe and necessary to null-terminate q
489
490 if (q != p)
491 {
492 *q = '\0';
493 }
494 }
495
|
496 kumpf 1.45 Boolean XmlParser::next(
497 XmlEntry& entry,
498 Boolean includeComment)
|
499 mike 1.13 {
500 if (!_putBackStack.isEmpty())
501 {
|
502 david.dillard 1.32 entry = _putBackStack.top();
503 _putBackStack.pop();
504 return true;
|
505 mike 1.13 }
506
507 // If a character was overwritten with a null-terminator the last
508 // time this routine was called, then put back that character. Before
509 // exiting of course, restore the null-terminator.
510
511 char* nullTerminator = 0;
512
513 if (_restoreChar && !*_current)
514 {
|
515 david.dillard 1.32 nullTerminator = _current;
516 *_current = _restoreChar;
517 _restoreChar = '\0';
|
518 mike 1.13 }
519
|
520 kumpf 1.45 entry.attributes.clear();
521
522 if (_supportedNamespaces)
523 {
524 // Remove namespaces of a deeper scope level from the stack.
525 while (!_nameSpaces.isEmpty() &&
526 _nameSpaces.top().scopeLevel > _stack.size())
527 {
528 _nameSpaces.pop();
529 }
530 }
531
|
532 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
533 do
534 {
535 // Skip over any whitespace:
536 _skipWhitespace(_line, _current);
537
538 if (!*_current)
539 {
540 if (nullTerminator)
541 *nullTerminator = '\0';
|
542 mike 1.13
|
543 venkat.puvvada 1.41 if (!_stack.isEmpty())
544 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
545 mike 1.13
|
546 venkat.puvvada 1.41 return false;
547 }
|
548 mike 1.13
|
549 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
550 mike 1.13
|
551 venkat.puvvada 1.41 if (*_current == '<')
552 {
553 _current++;
554 _getElement(_current, entry);
|
555 mike 1.13
|
556 venkat.puvvada 1.41 if (nullTerminator)
557 *nullTerminator = '\0';
|
558 mike 1.13
|
559 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
560 {
561 if (_stack.isEmpty() && _foundRoot)
562 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
563 mike 1.13
|
564 venkat.puvvada 1.41 _foundRoot = true;
565 _stack.push((char*)entry.text);
566 }
567 else if (entry.type == XmlEntry::END_TAG)
568 {
569 if (_stack.isEmpty())
570 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
571 mike 1.13
|
572 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
573 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
574 david.dillard 1.32
|
575 venkat.puvvada 1.41 _stack.pop();
576 }
|
577 david.dillard 1.32 }
|
578 venkat.puvvada 1.41 else
|
579 david.dillard 1.32 {
|
580 venkat.puvvada 1.41 // Normalize the content:
|
581 mike 1.13
|
582 venkat.puvvada 1.41 char* start;
583 _normalize(_line, _current, '<', start);
|
584 mike 1.13
|
585 venkat.puvvada 1.41 // Get the content:
|
586 mike 1.13
|
587 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
588 entry.text = start;
|
589 kumpf 1.37
|
590 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
591 kumpf 1.37
|
592 venkat.puvvada 1.41 _restoreChar = *_current;
593 *_current = '\0';
|
594 kumpf 1.37
|
595 venkat.puvvada 1.41 if (nullTerminator)
596 *nullTerminator = '\0';
597 }
|
598 kumpf 1.45 } while (!includeComment && entry.type == XmlEntry::COMMENT);
599
600 if (_supportedNamespaces &&
601 (entry.type == XmlEntry::START_TAG ||
602 entry.type == XmlEntry::EMPTY_TAG ||
603 entry.type == XmlEntry::END_TAG))
604 {
605 // Determine the namespace type for this entry
606
607 if (entry.type == XmlEntry::START_TAG ||
608 entry.type == XmlEntry::EMPTY_TAG)
609 {
610 // Process namespace declarations and determine the namespace type
611 // for the attributes.
612
613 Uint32 scopeLevel = _stack.size();
614 if (entry.type == XmlEntry::EMPTY_TAG)
615 {
616 // Empty tags are deeper scope, but not pushed onto the stack
617 scopeLevel++;
618 }
619 kumpf 1.45
620 for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++)
621 {
622 XmlAttribute& attr = entry.attributes[i];
623 if ((strncmp(attr.name, "xmlns:", 6) == 0) ||
624 (strcmp(attr.name, "xmlns") == 0))
625 {
626 // Process a namespace declaration
627 XmlNamespace ns;
628 if (attr.name[5] == ':')
629 {
630 ns.localName = attr.localName;
631 }
632 else
633 {
634 // Default name space has no local name
635 ns.localName = 0;
636 }
637 ns.extendedName = attr.value;
638 ns.scopeLevel = scopeLevel;
639 ns.type = _getSupportedNamespaceType(ns.extendedName);
640 kumpf 1.45
641 // If the namespace is not supported, assign it a unique
642 // negative identifier.
643 if (ns.type == -1)
644 {
645 ns.type = _currentUnsupportedNSType--;
646 }
647
648 _nameSpaces.push(ns);
649 }
650 else
651 {
652 // Get the namespace type for this attribute.
653 attr.nsType = _getNamespaceType(attr.name);
654 }
655 }
656 }
657
658 entry.nsType = _getNamespaceType(entry.text);
659 }
660 else
661 kumpf 1.45 {
662 entry.nsType = -1;
663 }
|
664 kumpf 1.37
|
665 venkat.puvvada 1.41 return true;
|
666 mike 1.13 }
667
|
668 kumpf 1.45 // Get the namespace type of the given tag
669 int XmlParser::_getNamespaceType(const char* tag)
670 {
671 const char* pos = strchr(tag, ':');
672
673 // If ':' is not found, the tag is not namespace qualified and we
674 // need to look for the default name space.
675
676 // Search the namespace stack from the top
677 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
678 {
679 // If ':' is found, look for the name space with the matching
680 // local name...
681 if ((pos && _nameSpaces[i].localName &&
682 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
683 // ... otherwise look for the default name space. It's the
684 // one with localName set to NULL
685 (!pos && !_nameSpaces[i].localName))
686 {
687 return _nameSpaces[i].type;
688 }
689 kumpf 1.45 }
690
691 // If the tag is namespace qualified, but the name space has not been
692 // declared, it's malformed XML and we must throw an exception.
693 // Note: The "xml" namespace is specifically defined by the W3C as a
694 // reserved prefix ("http://www.w3.org/XML/1998/namespace").
695 if (pos && (strncmp(tag, "xml:", 4) != 0))
696 {
697 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
698 }
699
700 // Otherwise it's OK not to have a name space.
701 return -1;
702 }
703
704 // Given the extended namespace name, find it in the table of supported
705 // namespaces and return its type.
706 int XmlParser::_getSupportedNamespaceType(const char* extendedName)
707 {
708 for (Sint32 i = 0;
709 _supportedNamespaces[i].localName != 0;
710 kumpf 1.45 i++)
711 {
712 PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
713 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
714 {
715 return _supportedNamespaces[i].type;
716 }
717 }
718 return -1;
719 }
720
721 XmlNamespace* XmlParser::getNamespace(int nsType)
722 {
723 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
724 {
725 if (_nameSpaces[i].type == nsType)
726 {
727 return &_nameSpaces[i];
728 }
729 }
730 return 0;
731 kumpf 1.45 }
732
|
733 mike 1.13 void XmlParser::putBack(XmlEntry& entry)
734 {
735 _putBackStack.push(entry);
736 }
737
738 XmlParser::~XmlParser()
739 {
740 // Nothing to do!
741 }
742
|
743 kumpf 1.45 // A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)
|
744 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
745 mike 1.35 {
746 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
747 kumpf 1.45 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
|
748 mike 1.35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
749 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
750 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
751 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
752 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
753 };
754
|
755 kumpf 1.45 inline Boolean _getQName(char*& p, const char*& localName)
|
756 mike 1.13 {
|
757 kumpf 1.45 localName = p;
758
|
759 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
760 kumpf 1.45 return false;
|
761 mike 1.35
|
762 kumpf 1.24 p++;
|
763 mike 1.13
|
764 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
765 david.dillard 1.32 p++;
|
766 mike 1.13
|
767 kumpf 1.45 // We've validated the prefix, now validate the local name
768 if (*p == ':')
769 {
770 localName = ++p;
771
772 if (!CharSet::isAlNumUnder(Uint8(*p)))
773 return false;
774
775 p++;
776
777 while (*p && _isInnerElementChar[Uint8(*p)])
778 p++;
779 }
780
781 return true;
782 }
783
784 Boolean XmlParser::_getElementName(char*& p, const char*& localName)
785 {
786 if (!_getQName(p, localName))
787 throw XmlException(XmlException::BAD_START_TAG, _line);
788 kumpf 1.45
|
789 mike 1.13 // The next character must be a space:
790
|
791 chuck 1.26 if (_isspace(*p))
|
792 mike 1.13 {
|
793 david.dillard 1.32 *p++ = '\0';
|
794 mike 1.34 _skipWhitespace(_line, p);
|
795 mike 1.13 }
796
797 if (*p == '>')
798 {
|
799 david.dillard 1.32 *p++ = '\0';
800 return true;
|
801 mike 1.13 }
802
803 return false;
804 }
805
|
806 kumpf 1.45 Boolean XmlParser::_getOpenElementName(
807 char*& p,
808 const char*& localName,
809 Boolean& openCloseElement)
|
810 mike 1.13 {
811 openCloseElement = false;
812
|
813 kumpf 1.45 if (!_getQName(p, localName))
|
814 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
815 mike 1.35
|
816 mike 1.13 // The next character must be a space:
817
|
818 chuck 1.26 if (_isspace(*p))
|
819 mike 1.13 {
|
820 david.dillard 1.32 *p++ = '\0';
|
821 mike 1.34 _skipWhitespace(_line, p);
|
822 mike 1.13 }
823
824 if (*p == '>')
825 {
|
826 david.dillard 1.32 *p++ = '\0';
827 return true;
|
828 mike 1.13 }
829
830 if (p[0] == '/' && p[1] == '>')
831 {
|
832 david.dillard 1.32 openCloseElement = true;
833 *p = '\0';
834 p += 2;
835 return true;
|
836 mike 1.13 }
837
838 return false;
839 }
840
|
841 kumpf 1.45 void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName)
|
842 mike 1.13 {
|
843 kumpf 1.45 if (!_getQName(p, localName))
|
844 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
845 mike 1.35
|
846 mike 1.13 char* term = p;
847
|
848 mike 1.34 _skipWhitespace(_line, p);
|
849 mike 1.13
850 if (*p != '=')
|
851 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
852 mike 1.13
853 p++;
854
|
855 mike 1.34 _skipWhitespace(_line, p);
|
856 mike 1.13
857 *term = '\0';
858 }
859
860 void XmlParser::_getComment(char*& p)
861 {
862 // Now p points to first non-whitespace character beyond "<--" sequence:
863
864 for (; *p; p++)
865 {
|
866 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
867 {
868 if (p[2] != '>')
869 {
870 throw XmlException(
871 XmlException::MINUS_MINUS_IN_COMMENT, _line);
872 }
873
874 // Find end of comment (excluding whitespace):
875
876 *p = '\0';
877 p += 3;
878 return;
879 }
|
880 mike 1.13 }
881
882 // If it got this far, then the comment is unterminated:
883
884 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
885 }
886
887 void XmlParser::_getCData(char*& p)
888 {
889 // At this point p points one past "<![CDATA[" sequence:
890
891 for (; *p; p++)
892 {
|
893 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
894 {
895 *p = '\0';
896 p += 3;
897 return;
898 }
899 else if (*p == '\n')
900 _line++;
|
901 mike 1.13 }
902
903 // If it got this far, then the comment is unterminated:
904
905 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
906 }
907
908 void XmlParser::_getDocType(char*& p)
909 {
910 // Just ignore the DOCTYPE command for now:
911
912 for (; *p && *p != '>'; p++)
913 {
|
914 david.dillard 1.32 if (*p == '\n')
915 _line++;
|
916 mike 1.13 }
917
918 if (*p != '>')
|
919 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
920 mike 1.13
921 p++;
922 }
923
924 void XmlParser::_getElement(char*& p, XmlEntry& entry)
925 {
926 //--------------------------------------------------------------------------
927 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
928 //--------------------------------------------------------------------------
929
930 if (*p == '?')
931 {
|
932 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
933 entry.text = ++p;
|
934 mike 1.13
|
935 kumpf 1.45 if (_getElementName(p, entry.localName))
|
936 david.dillard 1.32 return;
|
937 mike 1.13 }
938 else if (*p == '!')
939 {
|
940 david.dillard 1.32 p++;
|
941 mike 1.13
|
942 david.dillard 1.32 // Expect a comment or CDATA:
|
943 mike 1.13
|
944 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
945 {
946 p += 2;
947 entry.type = XmlEntry::COMMENT;
948 entry.text = p;
949 _getComment(p);
950 return;
951 }
952 else if (memcmp(p, "[CDATA[", 7) == 0)
953 {
954 p += 7;
955 entry.type = XmlEntry::CDATA;
956 entry.text = p;
957 _getCData(p);
958 return;
959 }
960 else if (memcmp(p, "DOCTYPE", 7) == 0)
961 {
962 entry.type = XmlEntry::DOCTYPE;
|
963 kumpf 1.37 entry.text = "";
|
964 david.dillard 1.32 _getDocType(p);
965 return;
966 }
967 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
968 mike 1.13 }
969 else if (*p == '/')
970 {
|
971 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
972 entry.text = ++p;
|
973 mike 1.13
|
974 kumpf 1.45 if (!_getElementName(p, entry.localName))
|
975 david.dillard 1.32 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
976 mike 1.13
|
977 david.dillard 1.32 return;
|
978 mike 1.13 }
|
979 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
980 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
981 (*p == '_')))
|
982 mike 1.13 {
|
983 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
984 entry.text = p;
|
985 mike 1.13
|
986 david.dillard 1.32 Boolean openCloseElement = false;
|
987 mike 1.13
|
988 kumpf 1.45 if (_getOpenElementName(p, entry.localName, openCloseElement))
|
989 david.dillard 1.32 {
990 if (openCloseElement)
991 entry.type = XmlEntry::EMPTY_TAG;
992 return;
993 }
|
994 mike 1.13 }
995 else
|
996 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
997 mike 1.13
998 //--------------------------------------------------------------------------
999 // Grab all the attributes:
1000 //--------------------------------------------------------------------------
1001
1002 for (;;)
1003 {
|
1004 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
1005 {
1006 if (p[0] == '?' && p[1] == '>')
1007 {
1008 p += 2;
1009 return;
1010 }
1011 }
1012 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
1013 {
1014 entry.type = XmlEntry::EMPTY_TAG;
1015 p += 2;
1016 return;
1017 }
1018 else if (*p == '>')
1019 {
1020 p++;
1021 return;
1022 }
1023
1024 XmlAttribute attr;
|
1025 kumpf 1.45 attr.nsType = -1;
|
1026 david.dillard 1.32 attr.name = p;
|
1027 kumpf 1.45 _getAttributeNameAndEqual(p, attr.localName);
|
1028 david.dillard 1.32
|
1029 kumpf 1.37 // Get the attribute value (e.g., "some value")
1030 {
1031 if ((*p != '"') && (*p != '\''))
1032 {
1033 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1034 }
1035
1036 char quote = *p++;
1037
1038 char* start;
1039 _normalize(_line, p, quote, start);
1040 attr.value = start;
1041
1042 if (*p != quote)
1043 {
1044 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1045 }
1046
1047 // Overwrite the closing quote with a null-terminator:
|
1048 david.dillard 1.32
|
1049 kumpf 1.37 *p++ = '\0';
1050 }
|
1051 david.dillard 1.32
1052 if (entry.type == XmlEntry::XML_DECLARATION)
1053 {
1054 // The next thing must a space or a "?>":
|
1055 mike 1.13
|
1056 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
1057 {
1058 throw XmlException(
1059 XmlException::BAD_ATTRIBUTE_VALUE, _line);
1060 }
1061 }
1062 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
1063 {
1064 // The next thing must be a space or a '>':
|
1065 mike 1.13
|
1066 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1067 }
|
1068 mike 1.13
|
1069 mike 1.34 _skipWhitespace(_line, p);
|
1070 david.dillard 1.32
|
1071 kumpf 1.45 entry.attributes.append(attr);
|
1072 mike 1.13 }
1073 }
1074
1075 static const char* _typeStrings[] =
1076 {
|
1077 david.dillard 1.32 "XML_DECLARATION",
1078 "START_TAG",
1079 "EMPTY_TAG",
1080 "END_TAG",
|
1081 mike 1.13 "COMMENT",
1082 "CDATA",
1083 "DOCTYPE",
|
1084 david.dillard 1.32 "CONTENT"
|
1085 mike 1.13 };
1086
1087 void XmlEntry::print() const
1088 {
1089 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1090
1091 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1092
1093 if (needQuotes)
|
1094 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1095
|
1096 mike 1.13 _printValue(text);
1097
1098 if (needQuotes)
|
1099 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1100 mike 1.13
1101 PEGASUS_STD(cout) << '\n';
1102
|
1103 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
|
1104 mike 1.13 {
|
1105 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1106 _printValue(attributes[i].value);
1107 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1108 mike 1.13 }
1109 }
1110
1111 const XmlAttribute* XmlEntry::findAttribute(
1112 const char* name) const
1113 {
|
1114 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
|
1115 mike 1.13 {
|
1116 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1117 return &attributes[i];
|
1118 mike 1.13 }
1119
1120 return 0;
1121 }
1122
|
1123 kumpf 1.45 const XmlAttribute* XmlEntry::findAttribute(
1124 int nsType,
1125 const char* name) const
1126 {
1127 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
1128 {
1129 if ((attributes[i].nsType == nsType) &&
1130 (strcmp(attributes[i].localName, name) == 0))
1131 {
1132 return &attributes[i];
1133 }
1134 }
1135
1136 return 0;
1137 }
1138
|
1139 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace
1140 // character (set last one past this). For example, consider this string:
1141 //
|
1142 david.dillard 1.32 // " 87 "
|
1143 mike 1.13 //
1144 // The first pointer would point to '8' and the last pointer woudl point one
1145 // beyond '7'.
1146
1147 static void _findEnds(
|
1148 david.dillard 1.32 const char* str,
1149 const char*& first,
|
1150 mike 1.13 const char*& last)
1151 {
1152 first = str;
1153
|
1154 chuck 1.26 while (_isspace(*first))
|
1155 david.dillard 1.32 first++;
|
1156 mike 1.13
1157 if (!*first)
1158 {
|
1159 david.dillard 1.32 last = first;
1160 return;
|
1161 mike 1.13 }
1162
1163 last = first + strlen(first);
1164
|
1165 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1166 david.dillard 1.32 last--;
|
1167 mike 1.13 }
1168
1169 Boolean XmlEntry::getAttributeValue(
|
1170 david.dillard 1.32 const char* name,
|
1171 mike 1.13 Uint32& value) const
1172 {
1173 const XmlAttribute* attr = findAttribute(name);
1174
1175 if (!attr)
|
1176 david.dillard 1.32 return false;
|
1177 mike 1.13
1178 const char* first;
1179 const char* last;
1180 _findEnds(attr->value, first, last);
1181
1182 char* end = 0;
1183 long tmp = strtol(first, &end, 10);
1184
1185 if (!end || end != last)
|
1186 david.dillard 1.32 return false;
|
1187 mike 1.13
1188 value = Uint32(tmp);
1189 return true;
1190 }
1191
1192 Boolean XmlEntry::getAttributeValue(
|
1193 david.dillard 1.32 const char* name,
|
1194 mike 1.13 Real32& value) const
1195 {
1196 const XmlAttribute* attr = findAttribute(name);
1197
1198 if (!attr)
|
1199 david.dillard 1.32 return false;
|
1200 mike 1.13
1201 const char* first;
1202 const char* last;
1203 _findEnds(attr->value, first, last);
1204
1205 char* end = 0;
1206 double tmp = strtod(first, &end);
1207
1208 if (!end || end != last)
|
1209 david.dillard 1.32 return false;
|
1210 mike 1.13
|
1211 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1212 mike 1.13 return true;
1213 }
1214
1215 Boolean XmlEntry::getAttributeValue(
|
1216 david.dillard 1.32 const char* name,
|
1217 mike 1.13 const char*& value) const
1218 {
1219 const XmlAttribute* attr = findAttribute(name);
1220
1221 if (!attr)
|
1222 david.dillard 1.32 return false;
|
1223 mike 1.13
1224 value = attr->value;
1225 return true;
1226 }
1227
1228 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1229 {
1230 const char* tmp;
1231
1232 if (!getAttributeValue(name, tmp))
|
1233 david.dillard 1.32 return false;
|
1234 mike 1.13
|
1235 chuck 1.28 value = String(tmp);
|
1236 mike 1.13 return true;
1237 }
1238
|
1239 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1240 mike 1.13 {
|
1241 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1242 mike 1.13 }
1243
1244 PEGASUS_NAMESPACE_END
|