1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 ////////////////////////////////////////////////////////////////////////////////
35 //
36 // XmlParser
37 //
|
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
39 // serveral rules for well-formed XML:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
42 mike 1.13 //
|
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
44 mike 1.13 //
|
45 david.dillard 1.32 // 2. Comments have the form:
|
46 mike 1.13 //
|
47 david.dillard 1.32 // <!-- blah blah blah -->
|
48 mike 1.13 //
|
49 david.dillard 1.32 // 3. The following entity references are supported:
|
50 mike 1.13 //
|
51 david.dillard 1.32 // & - ampersand
52 // < - less-than
53 // > - greater-than
54 // " - full quote
55 // &apos - apostrophe
|
56 mike 1.13 //
|
57 kumpf 1.18 // as well as character (numeric) references:
|
58 mike 1.35 //
|
59 kumpf 1.18 // 1 - decimal reference for character '1'
60 // 1 - hexadecimal reference for character '1'
61 //
|
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
63 mike 1.13 //
|
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
65 mike 1.13 //
|
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
67 mike 1.13 //
|
68 david.dillard 1.32 // <![CDATA[
69 // ...
70 // ]]>
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
73 mike 1.13 //
|
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
75 // XmlAttribute values must be delimited.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 8. <!DOCTYPE...>
|
78 mike 1.13 //
79 // TODO:
80 //
|
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
83 mike 1.13 // rules rather than references to files).
84 //
|
85 david.dillard 1.32 // Remove newlines from string literals:
|
86 mike 1.13 //
87 // Example: <xyz x="hello
|
88 david.dillard 1.32 // world">
|
89 mike 1.13 //
90 ////////////////////////////////////////////////////////////////////////////////
91
|
92 sage 1.14 #include <Pegasus/Common/Config.h>
|
93 mike 1.13 #include <cctype>
94 #include <cstdio>
95 #include <cstdlib>
96 #include <cstring>
97 #include "XmlParser.h"
98 #include "Logger.h"
|
99 chuck 1.19 #include "ExceptionRep.h"
|
100 mike 1.34 #include "CharSet.h"
|
101 mike 1.13
102 PEGASUS_NAMESPACE_BEGIN
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
|
114 david.dillard 1.32 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
|
122 mike 1.13 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
|
142 chuck 1.26
143 // Implements a check for a whitespace character, without calling
144 // isspace( ). The isspace( ) function is locale-sensitive,
145 // and incorrectly flags some chars above 0x7f as whitespace. This
146 // causes the XmlParser to incorrectly parse UTF-8 data.
147 //
148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
149 // defines white space as:
|
150 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
151 mike 1.34 static inline int _isspace(char c)
|
152 chuck 1.26 {
|
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
154 chuck 1.26 }
155
|
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
157
158 ////////////////////////////////////////////////////////////////////////////////
159 //
160 // XmlException
161 //
162 ////////////////////////////////////////////////////////////////////////////////
163
164 static const char* _xmlMessages[] =
165 {
166 "Bad opening element",
167 "Bad closing element",
168 "Bad attribute name",
169 "Exepected equal sign",
170 "Bad attribute value",
171 "A \"--\" sequence found within comment",
172 "Unterminated comment",
173 "Unterminated CDATA block",
174 "Unterminated DOCTYPE",
175 "Malformed reference",
176 "Expected a comment or CDATA following \"<!\" sequence",
177 mike 1.13 "Closing element does not match opening element",
178 "One or more tags are still open",
179 "More than one root element was encountered",
180 "Validation error",
|
181 dmitry.mikulin 1.43.2.4 "Semantic error",
182 "Namespace not declared"
|
183 mike 1.13 };
184
|
185 david.dillard 1.32 static const char* _xmlKeys[] =
|
186 chuck 1.19 {
|
187 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
188 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
189 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
190 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
191 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
192 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
193 "Common.XmlParser.UNTERMINATED_COMMENT",
194 "Common.XmlParser.UNTERMINATED_CDATA",
195 "Common.XmlParser.UNTERMINATED_DOCTYPE",
196 "Common.XmlParser.MALFORMED_REFERENCE",
197 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
198 "Common.XmlParser.START_END_MISMATCH",
|
199 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
200 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
201 "Common.XmlParser.VALIDATION_ERROR",
|
202 dmitry.mikulin 1.43.2.4 "Common.XmlParser.SEMANTIC_ERROR",
203 "Common.XmlParser.UNDECLARED_NAMESPACE"
|
204 chuck 1.19 };
205
206
|
207 kumpf 1.40 static MessageLoaderParms _formMessage(
208 Uint32 code,
209 Uint32 line,
210 const String& message)
|
211 chuck 1.19 {
212 String dftMsg = _xmlMessages[Uint32(code) - 1];
213 String key = _xmlKeys[Uint32(code) - 1];
|
214 dmitry.mikulin 1.43.2.5 String msg = message;
|
215 chuck 1.19
216 dftMsg.append(": on line $0");
217 if (message.size())
218 {
|
219 david.dillard 1.32 msg = ": " + msg;
220 dftMsg.append("$1");
221 }
|
222 chuck 1.19
223 return MessageLoaderParms(key, dftMsg, line ,msg);
224 }
225
226 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
227 {
228 String dftMsg = _xmlMessages[Uint32(code) - 1];
229 String key = _xmlKeys[Uint32(code) - 1];
230
231 dftMsg.append(": on line $0");
|
232 david.dillard 1.32
|
233 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
234 }
235
|
236 mike 1.13
237 XmlException::XmlException(
|
238 david.dillard 1.32 XmlException::Code code,
|
239 mike 1.13 Uint32 lineNumber,
|
240 david.dillard 1.32 const String& message)
|
241 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
242 {
243
244 }
245
|
246 chuck 1.19
247 XmlException::XmlException(
|
248 david.dillard 1.32 XmlException::Code code,
|
249 chuck 1.19 Uint32 lineNumber,
|
250 david.dillard 1.32 MessageLoaderParms& msgParms)
|
251 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
252 {
|
253 david.dillard 1.32 if (msgParms.default_msg.size())
|
254 humberto 1.21 {
|
255 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
256 }
257 _rep->message.append(MessageLoader::getMessage(msgParms));
|
258 chuck 1.19 }
259
260
|
261 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
262 //
263 // XmlValidationError
264 //
265 ////////////////////////////////////////////////////////////////////////////////
266
267 XmlValidationError::XmlValidationError(
268 Uint32 lineNumber,
269 const String& message)
270 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
271 {
272 }
273
|
274 chuck 1.19
275 XmlValidationError::XmlValidationError(
276 Uint32 lineNumber,
277 MessageLoaderParms& msgParms)
278 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
279 {
280 }
281
282
|
283 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
284 //
285 // XmlSemanticError
286 //
287 ////////////////////////////////////////////////////////////////////////////////
288
289 XmlSemanticError::XmlSemanticError(
290 Uint32 lineNumber,
291 const String& message)
292 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
293 {
294 }
|
295 chuck 1.19
296
297 XmlSemanticError::XmlSemanticError(
298 Uint32 lineNumber,
299 MessageLoaderParms& msgParms)
300 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
301 {
302 }
303
|
304 mike 1.13
305 ////////////////////////////////////////////////////////////////////////////////
306 //
307 // XmlParser
308 //
309 ////////////////////////////////////////////////////////////////////////////////
310
|
311 dmitry.mikulin 1.43.2.2 XmlParser::XmlParser(char* text, XmlNamespace* ns)
|
312 kumpf 1.40 : _line(1),
313 _current(text),
314 _restoreChar('\0'),
|
315 dmitry.mikulin 1.43.2.2 _foundRoot(false),
316 _scopeLevel(0),
|
317 dmitry.mikulin 1.43.2.10 _supportedNamespaces(ns),
318 _currentUnsupportedNSType(-1)
|
319 mike 1.13 {
320 }
321
|
322 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
323 {
324 while (*p && _isspace(*p))
325 {
326 if (*p == '\n')
327 line++;
328
329 p++;
330 }
331 }
332
|
333 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
334 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
335 #pragma optimize( "", off )
336 #endif
|
337 kumpf 1.37 static int _getEntityRef(char*& p)
338 {
339 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
340 {
341 p += 3;
342 return '>';
343 }
344
345 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
346 {
347 p += 3;
348 return '<';
349 }
350
351 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
352 (p[4] == ';'))
353 {
354 p += 5;
355 return '\'';
356 }
357
358 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
359 (p[4] == ';'))
360 {
361 p += 5;
362 return '"';
363 }
364
365 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
366 {
367 p += 4;
368 return '&';
369 }
370
371 return -1;
372 }
|
373 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
374 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
375 #pragma optimize( "", on )
376 #endif
|
377 kumpf 1.37
378 static inline int _getCharRef(char*& p, bool hex)
379 {
380 char* end;
381 unsigned long ch;
382
383 if (hex)
384 {
385 ch = strtoul(p, &end, 16);
386 }
387 else
388 {
389 ch = strtoul(p, &end, 10);
390 }
391
392 if ((end == p) || (*end != ';') || (ch > 255))
393 {
394 return -1;
395 }
396
397 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
398 kumpf 1.37 {
399 return -1;
400 }
401
402 p = end + 1;
403
404 return ch;
405 }
406
407 static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
408 {
409 // Skip over leading whitespace:
410
411 _skipWhitespace(line, p);
412 start = p;
413
414 // Process one character at a time:
415
416 char* q = p;
417
418 while (*p && (*p != end_char))
419 kumpf 1.37 {
420 if (_isspace(*p))
421 {
422 // Compress sequences of whitespace characters to a single space
423 // character. Update line number when newlines encountered.
424
425 if (*p++ == '\n')
426 {
427 line++;
428 }
429
430 *q++ = ' ';
431
432 _skipWhitespace(line, p);
433 }
434 else if (*p == '&')
435 {
436 // Process entity characters and entity references:
437
438 p++;
439 int ch;
440 kumpf 1.37
441 if (*p == '#')
442 {
443 *p++;
444
445 if (*p == 'x')
446 {
447 p++;
448 ch = _getCharRef(p, true);
449 }
450 else
451 {
452 ch = _getCharRef(p, false);
453 }
454 }
455 else
456 {
457 ch = _getEntityRef(p);
458 }
459
460 if (ch == -1)
461 kumpf 1.37 {
462 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
463 }
464
465 *q++ = ch;
466 }
467 else
468 {
469 *q++ = *p++;
470 }
471 }
472
|
473 kumpf 1.40 // We encountered a the end_char or a zero-terminator.
|
474 kumpf 1.37
475 *q = *p;
476
477 // Remove single trailing whitespace (consecutive whitespaces already
478 // compressed above). Since p >= q, we can tell if we need to strip a
479 // trailing space from q by looking at the end of p. We must not look at
480 // the last character of p, though, if p is an empty string.
481
482 if ((p != start) && _isspace(p[-1]))
483 {
484 q--;
485 }
486
487 // If q got behind p, it is safe and necessary to null-terminate q
488
489 if (q != p)
490 {
491 *q = '\0';
492 }
493 }
494
|
495 kumpf 1.43.2.1 Boolean XmlParser::next(
496 XmlEntry& entry,
|
497 dmitry.mikulin 1.43.2.2 Boolean includeComment)
|
498 mike 1.13 {
|
499 kumpf 1.43.2.1 entry.attributes.clear();
500
|
501 mike 1.13 if (!_putBackStack.isEmpty())
502 {
|
503 david.dillard 1.32 entry = _putBackStack.top();
504 _putBackStack.pop();
505 return true;
|
506 mike 1.13 }
507
508 // If a character was overwritten with a null-terminator the last
509 // time this routine was called, then put back that character. Before
510 // exiting of course, restore the null-terminator.
511
512 char* nullTerminator = 0;
513
514 if (_restoreChar && !*_current)
515 {
|
516 david.dillard 1.32 nullTerminator = _current;
517 *_current = _restoreChar;
518 _restoreChar = '\0';
|
519 mike 1.13 }
520
|
521 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
522 do
523 {
524 // Skip over any whitespace:
525 _skipWhitespace(_line, _current);
526
527 if (!*_current)
528 {
529 if (nullTerminator)
530 *nullTerminator = '\0';
|
531 mike 1.13
|
532 venkat.puvvada 1.41 if (!_stack.isEmpty())
533 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
534 mike 1.13
|
535 venkat.puvvada 1.41 return false;
536 }
|
537 mike 1.13
|
538 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
539 mike 1.13
|
540 venkat.puvvada 1.41 if (*_current == '<')
541 {
542 _current++;
543 _getElement(_current, entry);
|
544 mike 1.13
|
545 venkat.puvvada 1.41 if (nullTerminator)
546 *nullTerminator = '\0';
|
547 mike 1.13
|
548 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
549 {
550 if (_stack.isEmpty() && _foundRoot)
551 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
552 mike 1.13
|
553 venkat.puvvada 1.41 _foundRoot = true;
554 _stack.push((char*)entry.text);
555 }
556 else if (entry.type == XmlEntry::END_TAG)
557 {
558 if (_stack.isEmpty())
559 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
560 mike 1.13
|
561 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
562 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
563 david.dillard 1.32
|
564 venkat.puvvada 1.41 _stack.pop();
565 }
|
566 david.dillard 1.32 }
|
567 venkat.puvvada 1.41 else
|
568 david.dillard 1.32 {
|
569 venkat.puvvada 1.41 // Normalize the content:
|
570 mike 1.13
|
571 venkat.puvvada 1.41 char* start;
572 _normalize(_line, _current, '<', start);
|
573 mike 1.13
|
574 venkat.puvvada 1.41 // Get the content:
|
575 mike 1.13
|
576 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
577 entry.text = start;
|
578 kumpf 1.37
|
579 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
580 kumpf 1.37
|
581 venkat.puvvada 1.41 _restoreChar = *_current;
582 *_current = '\0';
|
583 kumpf 1.37
|
584 venkat.puvvada 1.41 if (nullTerminator)
585 *nullTerminator = '\0';
586 }
|
587 dmitry.mikulin 1.43.2.2 } while (!includeComment && entry.type == XmlEntry::COMMENT);
588
|
589 kumpf 1.43.2.3 if (_supportedNamespaces &&
590 (entry.type == XmlEntry::START_TAG ||
591 entry.type == XmlEntry::EMPTY_TAG ||
592 entry.type == XmlEntry::END_TAG))
|
593 dmitry.mikulin 1.43.2.2 {
594 // Process attributes and enter namespaces into the table
595 if (entry.type == XmlEntry::START_TAG ||
596 entry.type == XmlEntry::EMPTY_TAG)
597 {
598 _scopeLevel++;
599 for (unsigned int i = 0; i < entry.attributes.size(); i++)
600 {
|
601 kumpf 1.43.2.3 XmlAttribute& attr = entry.attributes[i];
|
602 dmitry.mikulin 1.43.2.4 if (strncmp(attr.name, "xmlns", 5) == 0)
|
603 dmitry.mikulin 1.43.2.2 {
604 XmlNamespace ns;
|
605 dmitry.mikulin 1.43.2.4 if (attr.name[5] == ':')
|
606 dmitry.mikulin 1.43.2.2 {
|
607 kumpf 1.43.2.11 ns.localName = attr.localName;
|
608 dmitry.mikulin 1.43.2.2 }
609 else
610 {
|
611 dmitry.mikulin 1.43.2.4 // Default name space has no local name
612 ns.localName = 0;
|
613 dmitry.mikulin 1.43.2.2 }
|
614 dmitry.mikulin 1.43.2.4 ns.extendedName = attr.value;
615 ns.scopeLevel = _scopeLevel;
616 ns.type = getSupportedNamespaceType(ns.extendedName);
|
617 dmitry.mikulin 1.43.2.10
618 // If the namespace is not supported, assign it a unique
619 // negative indentifier.
620 if (ns.type == -1)
621 {
622 ns.type = _currentUnsupportedNSType--;
623 }
|
624 dmitry.mikulin 1.43.2.4
625 // Even unsupported namespaces get pushed onto the stack.
626 // We will throw an exception of there is an attempt to
627 // reference an unsupported namespace later.
628 _nameSpaces.push(ns);
629 }
630 else
631 {
632 // Attribute names may also be namespace qualified.
633 attr.nsType = _getNamespaceType(attr.name);
|
634 dmitry.mikulin 1.43.2.2 }
635 }
636 }
|
637 kumpf 1.43.2.3
|
638 dmitry.mikulin 1.43.2.4 // Get the namespace type for this tag.
639 entry.nsType = _getNamespaceType(entry.text);
|
640 dmitry.mikulin 1.43.2.2
|
641 kumpf 1.43.2.3 if (entry.type == XmlEntry::END_TAG ||
642 entry.type == XmlEntry::EMPTY_TAG)
643 {
|
644 dmitry.mikulin 1.43.2.2 // Remove any namespaces of the current scope level from
645 // the scope stack.
646 while (!_nameSpaces.isEmpty() &&
647 _scopeLevel <= _nameSpaces.top().scopeLevel)
648 {
649 _nameSpaces.pop();
650 }
651
652 PEGASUS_ASSERT(_scopeLevel > 0);
653 _scopeLevel--;
654 }
655 }
656 else
657 {
658 entry.nsType = -1;
659 }
|
660 kumpf 1.37
|
661 venkat.puvvada 1.41 return true;
|
662 mike 1.13 }
663
|
664 dmitry.mikulin 1.43.2.2 // Get the namespace type of the given tag
665 int XmlParser::_getNamespaceType(const char* tag)
666 {
667 const char* pos = strchr(tag, ':');
668
|
669 dmitry.mikulin 1.43.2.4 // If ":" is not found, the tag is not namespace qualified and we
670 // need to look for the default name space.
|
671 dmitry.mikulin 1.43.2.2
672 // Search the namespace stack from the top
673 for (int i = _nameSpaces.size() - 1; i >=0; i--)
674 {
|
675 dmitry.mikulin 1.43.2.4 // If ":" is found, look for the name space with the matching
676 // local name...
677 if ((pos && _nameSpaces[i].localName &&
678 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
679 // ... otherwise look for the default name space. It's the
680 // one with localName set to NULL
681 (!pos && !_nameSpaces[i].localName))
682 {
|
683 dmitry.mikulin 1.43.2.2 return _nameSpaces[i].type;
684 }
685 }
|
686 dmitry.mikulin 1.43.2.4
687 // If the tag is namespace qualified, but the name space has not been
|
688 kumpf 1.43.2.7 // declared, it's malformed XML and we must throw an exception.
689 // Note: The "xml" namespace is specifically defined by the W3C as a
690 // reserved prefix ("http://www.w3.org/XML/1998/namespace").
691 if (pos && (strncmp(tag, "xml:", 4) != 0))
|
692 dmitry.mikulin 1.43.2.4 {
693 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
694 }
695
696 // Otherwise it's OK not to have a name space.
|
697 dmitry.mikulin 1.43.2.2 return -1;
698 }
699
|
700 dmitry.mikulin 1.43.2.10 // Given the extended namespace name, find it in the table of supported
|
701 dmitry.mikulin 1.43.2.2 // namespaces and return its type.
702 int XmlParser::getSupportedNamespaceType(const char* extendedName)
703 {
704 for (int i = 0;
705 _supportedNamespaces[i].localName != 0;
706 i++)
707 {
708 PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
709 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
710 {
711 return _supportedNamespaces[i].type;
712 }
713 }
714 return -1;
715 }
716
717 XmlNamespace* XmlParser::getNamespace(int nsType)
718 {
719 for (int i = _nameSpaces.size() - 1; i >=0; i--)
720 {
721 if (_nameSpaces[i].type == nsType)
722 dmitry.mikulin 1.43.2.2 {
723 return &_nameSpaces[i];
724 }
725 }
726 return 0;
727 }
728
|
729 mike 1.13 void XmlParser::putBack(XmlEntry& entry)
730 {
731 _putBackStack.push(entry);
732 }
733
734 XmlParser::~XmlParser()
735 {
736 // Nothing to do!
737 }
738
|
739 kumpf 1.43.2.11 // A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)
|
740 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
741 mike 1.35 {
742 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
743 kumpf 1.43.2.11 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
|
744 mike 1.35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
745 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
746 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
747 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
748 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
749 };
750
|
751 kumpf 1.43.2.11 inline Boolean _getQName(char*& p, const char*& localName)
|
752 mike 1.13 {
|
753 kumpf 1.43.2.11 localName = p;
754
|
755 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
756 kumpf 1.43.2.11 return false;
|
757 mike 1.35
|
758 kumpf 1.24 p++;
|
759 mike 1.13
|
760 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
761 david.dillard 1.32 p++;
|
762 mike 1.13
|
763 kumpf 1.43.2.11 // We've validated the prefix, now validate the local name
764 if (*p == ':')
765 {
766 localName = ++p;
767
768 if (!CharSet::isAlNumUnder(Uint8(*p)))
769 return false;
770
771 p++;
772
773 while (*p && _isInnerElementChar[Uint8(*p)])
774 p++;
775 }
776
777 return true;
778 }
779
780 Boolean XmlParser::_getElementName(char*& p, const char*& localName)
781 {
782 if (!_getQName(p, localName))
783 throw XmlException(XmlException::BAD_START_TAG, _line);
784 kumpf 1.43.2.11
|
785 mike 1.13 // The next character must be a space:
786
|
787 chuck 1.26 if (_isspace(*p))
|
788 mike 1.13 {
|
789 david.dillard 1.32 *p++ = '\0';
|
790 mike 1.34 _skipWhitespace(_line, p);
|
791 mike 1.13 }
792
793 if (*p == '>')
794 {
|
795 david.dillard 1.32 *p++ = '\0';
796 return true;
|
797 mike 1.13 }
798
799 return false;
800 }
801
|
802 kumpf 1.43.2.11 Boolean XmlParser::_getOpenElementName(
803 char*& p,
804 const char*& localName,
805 Boolean& openCloseElement)
|
806 mike 1.13 {
807 openCloseElement = false;
808
|
809 kumpf 1.43.2.11 if (!_getQName(p, localName))
|
810 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
811 mike 1.35
|
812 mike 1.13 // The next character must be a space:
813
|
814 chuck 1.26 if (_isspace(*p))
|
815 mike 1.13 {
|
816 david.dillard 1.32 *p++ = '\0';
|
817 mike 1.34 _skipWhitespace(_line, p);
|
818 mike 1.13 }
819
820 if (*p == '>')
821 {
|
822 david.dillard 1.32 *p++ = '\0';
823 return true;
|
824 mike 1.13 }
825
826 if (p[0] == '/' && p[1] == '>')
827 {
|
828 david.dillard 1.32 openCloseElement = true;
829 *p = '\0';
830 p += 2;
831 return true;
|
832 mike 1.13 }
833
834 return false;
835 }
836
|
837 kumpf 1.43.2.11 void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName)
|
838 mike 1.13 {
|
839 kumpf 1.43.2.11 if (!_getQName(p, localName))
|
840 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
841 mike 1.35
|
842 mike 1.13 char* term = p;
843
|
844 mike 1.34 _skipWhitespace(_line, p);
|
845 mike 1.13
846 if (*p != '=')
|
847 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
848 mike 1.13
849 p++;
850
|
851 mike 1.34 _skipWhitespace(_line, p);
|
852 mike 1.13
853 *term = '\0';
854 }
855
856 void XmlParser::_getComment(char*& p)
857 {
858 // Now p points to first non-whitespace character beyond "<--" sequence:
859
860 for (; *p; p++)
861 {
|
862 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
863 {
864 if (p[2] != '>')
865 {
866 throw XmlException(
867 XmlException::MINUS_MINUS_IN_COMMENT, _line);
868 }
869
870 // Find end of comment (excluding whitespace):
871
872 *p = '\0';
873 p += 3;
874 return;
875 }
|
876 mike 1.13 }
877
878 // If it got this far, then the comment is unterminated:
879
880 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
881 }
882
883 void XmlParser::_getCData(char*& p)
884 {
885 // At this point p points one past "<![CDATA[" sequence:
886
887 for (; *p; p++)
888 {
|
889 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
890 {
891 *p = '\0';
892 p += 3;
893 return;
894 }
895 else if (*p == '\n')
896 _line++;
|
897 mike 1.13 }
898
899 // If it got this far, then the comment is unterminated:
900
901 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
902 }
903
904 void XmlParser::_getDocType(char*& p)
905 {
906 // Just ignore the DOCTYPE command for now:
907
908 for (; *p && *p != '>'; p++)
909 {
|
910 david.dillard 1.32 if (*p == '\n')
911 _line++;
|
912 mike 1.13 }
913
914 if (*p != '>')
|
915 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
916 mike 1.13
917 p++;
918 }
919
920 void XmlParser::_getElement(char*& p, XmlEntry& entry)
921 {
922 //--------------------------------------------------------------------------
923 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
924 //--------------------------------------------------------------------------
925
926 if (*p == '?')
927 {
|
928 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
929 entry.text = ++p;
|
930 mike 1.13
|
931 david.dillard 1.32 Boolean openCloseElement = false;
|
932 mike 1.13
|
933 kumpf 1.43.2.11 if (_getElementName(p, entry.localName))
|
934 david.dillard 1.32 return;
|
935 mike 1.13 }
936 else if (*p == '!')
937 {
|
938 david.dillard 1.32 p++;
|
939 mike 1.13
|
940 david.dillard 1.32 // Expect a comment or CDATA:
|
941 mike 1.13
|
942 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
943 {
944 p += 2;
945 entry.type = XmlEntry::COMMENT;
946 entry.text = p;
947 _getComment(p);
948 return;
949 }
950 else if (memcmp(p, "[CDATA[", 7) == 0)
951 {
952 p += 7;
953 entry.type = XmlEntry::CDATA;
954 entry.text = p;
955 _getCData(p);
956 return;
957 }
958 else if (memcmp(p, "DOCTYPE", 7) == 0)
959 {
960 entry.type = XmlEntry::DOCTYPE;
|
961 kumpf 1.37 entry.text = "";
|
962 david.dillard 1.32 _getDocType(p);
963 return;
964 }
965 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
966 mike 1.13 }
967 else if (*p == '/')
968 {
|
969 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
970 entry.text = ++p;
|
971 mike 1.13
|
972 kumpf 1.43.2.11 if (!_getElementName(p, entry.localName))
|
973 david.dillard 1.32 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
974 mike 1.13
|
975 david.dillard 1.32 return;
|
976 mike 1.13 }
|
977 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
978 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
979 (*p == '_')))
|
980 mike 1.13 {
|
981 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
982 entry.text = p;
|
983 mike 1.13
|
984 david.dillard 1.32 Boolean openCloseElement = false;
|
985 mike 1.13
|
986 kumpf 1.43.2.11 if (_getOpenElementName(p, entry.localName, openCloseElement))
|
987 david.dillard 1.32 {
988 if (openCloseElement)
989 entry.type = XmlEntry::EMPTY_TAG;
990 return;
991 }
|
992 mike 1.13 }
993 else
|
994 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
995 mike 1.13
996 //--------------------------------------------------------------------------
997 // Grab all the attributes:
998 //--------------------------------------------------------------------------
999
1000 for (;;)
1001 {
|
1002 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
1003 {
1004 if (p[0] == '?' && p[1] == '>')
1005 {
1006 p += 2;
1007 return;
1008 }
1009 }
1010 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
1011 {
1012 entry.type = XmlEntry::EMPTY_TAG;
1013 p += 2;
1014 return;
1015 }
1016 else if (*p == '>')
1017 {
1018 p++;
1019 return;
1020 }
1021
1022 XmlAttribute attr;
|
1023 dmitry.mikulin 1.43.2.4 attr.nsType = -1;
|
1024 david.dillard 1.32 attr.name = p;
|
1025 kumpf 1.43.2.11 _getAttributeNameAndEqual(p, attr.localName);
|
1026 david.dillard 1.32
|
1027 kumpf 1.37 // Get the attribute value (e.g., "some value")
1028 {
1029 if ((*p != '"') && (*p != '\''))
1030 {
1031 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1032 }
1033
1034 char quote = *p++;
1035
1036 char* start;
1037 _normalize(_line, p, quote, start);
1038 attr.value = start;
1039
1040 if (*p != quote)
1041 {
1042 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1043 }
1044
1045 // Overwrite the closing quote with a null-terminator:
|
1046 david.dillard 1.32
|
1047 kumpf 1.37 *p++ = '\0';
1048 }
|
1049 david.dillard 1.32
1050 if (entry.type == XmlEntry::XML_DECLARATION)
1051 {
1052 // The next thing must a space or a "?>":
|
1053 mike 1.13
|
1054 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
1055 {
1056 throw XmlException(
1057 XmlException::BAD_ATTRIBUTE_VALUE, _line);
1058 }
1059 }
1060 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
1061 {
1062 // The next thing must be a space or a '>':
|
1063 mike 1.13
|
1064 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1065 }
|
1066 mike 1.13
|
1067 mike 1.34 _skipWhitespace(_line, p);
|
1068 david.dillard 1.32
|
1069 kumpf 1.43.2.1 entry.attributes.append(attr);
|
1070 mike 1.13 }
1071 }
1072
1073 static const char* _typeStrings[] =
1074 {
|
1075 david.dillard 1.32 "XML_DECLARATION",
1076 "START_TAG",
1077 "EMPTY_TAG",
1078 "END_TAG",
|
1079 mike 1.13 "COMMENT",
1080 "CDATA",
1081 "DOCTYPE",
|
1082 david.dillard 1.32 "CONTENT"
|
1083 mike 1.13 };
1084
1085 void XmlEntry::print() const
1086 {
1087 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1088
1089 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1090
1091 if (needQuotes)
|
1092 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1093
|
1094 mike 1.13 _printValue(text);
1095
1096 if (needQuotes)
|
1097 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1098 mike 1.13
1099 PEGASUS_STD(cout) << '\n';
1100
|
1101 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
|
1102 mike 1.13 {
|
1103 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1104 _printValue(attributes[i].value);
1105 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1106 mike 1.13 }
1107 }
1108
1109 const XmlAttribute* XmlEntry::findAttribute(
1110 const char* name) const
1111 {
|
1112 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
|
1113 mike 1.13 {
|
1114 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1115 return &attributes[i];
|
1116 mike 1.13 }
1117
1118 return 0;
1119 }
1120
|
1121 kumpf 1.43.2.6 const XmlAttribute* XmlEntry::findAttribute(
1122 int nsType,
1123 const char* name) const
1124 {
1125 for (Uint32 i = 0; i < attributes.size(); i++)
1126 {
|
1127 kumpf 1.43.2.11 if ((attributes[i].nsType == nsType) &&
1128 (strcmp(attributes[i].localName, name) == 0))
|
1129 kumpf 1.43.2.6 {
|
1130 kumpf 1.43.2.11 return &attributes[i];
|
1131 kumpf 1.43.2.6 }
1132 }
1133
1134 return 0;
1135 }
1136
|
1137 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace
1138 // character (set last one past this). For example, consider this string:
1139 //
|
1140 david.dillard 1.32 // " 87 "
|
1141 mike 1.13 //
1142 // The first pointer would point to '8' and the last pointer woudl point one
1143 // beyond '7'.
1144
1145 static void _findEnds(
|
1146 david.dillard 1.32 const char* str,
1147 const char*& first,
|
1148 mike 1.13 const char*& last)
1149 {
1150 first = str;
1151
|
1152 chuck 1.26 while (_isspace(*first))
|
1153 david.dillard 1.32 first++;
|
1154 mike 1.13
1155 if (!*first)
1156 {
|
1157 david.dillard 1.32 last = first;
1158 return;
|
1159 mike 1.13 }
1160
1161 last = first + strlen(first);
1162
|
1163 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1164 david.dillard 1.32 last--;
|
1165 mike 1.13 }
1166
1167 Boolean XmlEntry::getAttributeValue(
|
1168 david.dillard 1.32 const char* name,
|
1169 mike 1.13 Uint32& value) const
1170 {
1171 const XmlAttribute* attr = findAttribute(name);
1172
1173 if (!attr)
|
1174 david.dillard 1.32 return false;
|
1175 mike 1.13
1176 const char* first;
1177 const char* last;
1178 _findEnds(attr->value, first, last);
1179
1180 char* end = 0;
1181 long tmp = strtol(first, &end, 10);
1182
1183 if (!end || end != last)
|
1184 david.dillard 1.32 return false;
|
1185 mike 1.13
1186 value = Uint32(tmp);
1187 return true;
1188 }
1189
1190 Boolean XmlEntry::getAttributeValue(
|
1191 david.dillard 1.32 const char* name,
|
1192 mike 1.13 Real32& value) const
1193 {
1194 const XmlAttribute* attr = findAttribute(name);
1195
1196 if (!attr)
|
1197 david.dillard 1.32 return false;
|
1198 mike 1.13
1199 const char* first;
1200 const char* last;
1201 _findEnds(attr->value, first, last);
1202
1203 char* end = 0;
1204 double tmp = strtod(first, &end);
1205
1206 if (!end || end != last)
|
1207 david.dillard 1.32 return false;
|
1208 mike 1.13
|
1209 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1210 mike 1.13 return true;
1211 }
1212
1213 Boolean XmlEntry::getAttributeValue(
|
1214 david.dillard 1.32 const char* name,
|
1215 mike 1.13 const char*& value) const
1216 {
1217 const XmlAttribute* attr = findAttribute(name);
1218
1219 if (!attr)
|
1220 david.dillard 1.32 return false;
|
1221 mike 1.13
1222 value = attr->value;
1223 return true;
1224 }
1225
1226 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1227 {
1228 const char* tmp;
1229
1230 if (!getAttributeValue(name, tmp))
|
1231 david.dillard 1.32 return false;
|
1232 mike 1.13
|
1233 chuck 1.28 value = String(tmp);
|
1234 mike 1.13 return true;
1235 }
1236
|
1237 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1238 mike 1.13 {
|
1239 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1240 mike 1.13 }
1241
1242 PEGASUS_NAMESPACE_END
|