1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 ////////////////////////////////////////////////////////////////////////////////
35 //
36 // XmlParser
37 //
|
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
39 // serveral rules for well-formed XML:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
42 mike 1.13 //
|
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
44 mike 1.13 //
|
45 david.dillard 1.32 // 2. Comments have the form:
|
46 mike 1.13 //
|
47 david.dillard 1.32 // <!-- blah blah blah -->
|
48 mike 1.13 //
|
49 david.dillard 1.32 // 3. The following entity references are supported:
|
50 mike 1.13 //
|
51 david.dillard 1.32 // & - ampersand
52 // < - less-than
53 // > - greater-than
54 // " - full quote
55 // &apos - apostrophe
|
56 mike 1.13 //
|
57 kumpf 1.18 // as well as character (numeric) references:
|
58 mike 1.35 //
|
59 kumpf 1.18 // 1 - decimal reference for character '1'
60 // 1 - hexadecimal reference for character '1'
61 //
|
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
63 mike 1.13 //
|
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
65 mike 1.13 //
|
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
67 mike 1.13 //
|
68 david.dillard 1.32 // <![CDATA[
69 // ...
70 // ]]>
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
73 mike 1.13 //
|
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
75 // XmlAttribute values must be delimited.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 8. <!DOCTYPE...>
|
78 mike 1.13 //
79 // TODO:
80 //
|
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
83 mike 1.13 // rules rather than references to files).
84 //
|
85 david.dillard 1.32 // Remove newlines from string literals:
|
86 mike 1.13 //
87 // Example: <xyz x="hello
|
88 david.dillard 1.32 // world">
|
89 mike 1.13 //
90 ////////////////////////////////////////////////////////////////////////////////
91
|
92 sage 1.14 #include <Pegasus/Common/Config.h>
|
93 mike 1.13 #include <cctype>
94 #include <cstdio>
95 #include <cstdlib>
96 #include <cstring>
97 #include "XmlParser.h"
98 #include "Logger.h"
|
99 chuck 1.19 #include "ExceptionRep.h"
|
100 mike 1.34 #include "CharSet.h"
|
101 mike 1.13
102 PEGASUS_NAMESPACE_BEGIN
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
|
114 david.dillard 1.32 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
|
122 mike 1.13 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
|
142 chuck 1.26
143 // Implements a check for a whitespace character, without calling
144 // isspace( ). The isspace( ) function is locale-sensitive,
145 // and incorrectly flags some chars above 0x7f as whitespace. This
146 // causes the XmlParser to incorrectly parse UTF-8 data.
147 //
148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
149 // defines white space as:
|
150 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
151 mike 1.34 static inline int _isspace(char c)
|
152 chuck 1.26 {
|
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
154 chuck 1.26 }
155
|
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
157
158 ////////////////////////////////////////////////////////////////////////////////
159 //
160 // XmlException
161 //
162 ////////////////////////////////////////////////////////////////////////////////
163
164 static const char* _xmlMessages[] =
165 {
166 "Bad opening element",
167 "Bad closing element",
168 "Bad attribute name",
169 "Exepected equal sign",
170 "Bad attribute value",
171 "A \"--\" sequence found within comment",
172 "Unterminated comment",
173 "Unterminated CDATA block",
174 "Unterminated DOCTYPE",
175 "Too many attributes: parser only handles 10",
176 "Malformed reference",
177 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence",
178 "Closing element does not match opening element",
179 "One or more tags are still open",
180 "More than one root element was encountered",
181 "Validation error",
|
182 dmitry.mikulin 1.43.2.4 "Semantic error",
183 "Malformed namespace declaration",
184 "Namespace not supported",
185 "Namespace not declared"
|
186 mike 1.13 };
187
|
188 david.dillard 1.32 static const char* _xmlKeys[] =
|
189 chuck 1.19 {
|
190 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
191 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
192 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
193 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
194 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
195 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
196 "Common.XmlParser.UNTERMINATED_COMMENT",
197 "Common.XmlParser.UNTERMINATED_CDATA",
198 "Common.XmlParser.UNTERMINATED_DOCTYPE",
199 "Common.XmlParser.MALFORMED_REFERENCE",
200 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
201 "Common.XmlParser.START_END_MISMATCH",
|
202 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
203 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
204 "Common.XmlParser.VALIDATION_ERROR",
|
205 dmitry.mikulin 1.43.2.4 "Common.XmlParser.SEMANTIC_ERROR",
206 "Common.XmlParser.MALFORMED_NAMESPACE_DECL",
207 "Common.XmlParser.UNSUPPORTED_NAMESPACE",
208 "Common.XmlParser.UNDECLARED_NAMESPACE"
|
209 chuck 1.19 };
210
211
|
212 kumpf 1.40 static MessageLoaderParms _formMessage(
213 Uint32 code,
214 Uint32 line,
215 const String& message)
|
216 chuck 1.19 {
217 String dftMsg = _xmlMessages[Uint32(code) - 1];
218 String key = _xmlKeys[Uint32(code) - 1];
|
219 dmitry.mikulin 1.43.2.5 String msg = message;
|
220 chuck 1.19
221 dftMsg.append(": on line $0");
222 if (message.size())
223 {
|
224 david.dillard 1.32 msg = ": " + msg;
225 dftMsg.append("$1");
226 }
|
227 chuck 1.19
228 return MessageLoaderParms(key, dftMsg, line ,msg);
229 }
230
231 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
232 {
233 String dftMsg = _xmlMessages[Uint32(code) - 1];
234 String key = _xmlKeys[Uint32(code) - 1];
235
236 dftMsg.append(": on line $0");
|
237 david.dillard 1.32
|
238 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
239 }
240
|
241 mike 1.13
242 XmlException::XmlException(
|
243 david.dillard 1.32 XmlException::Code code,
|
244 mike 1.13 Uint32 lineNumber,
|
245 david.dillard 1.32 const String& message)
|
246 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
247 {
248
249 }
250
|
251 chuck 1.19
252 XmlException::XmlException(
|
253 david.dillard 1.32 XmlException::Code code,
|
254 chuck 1.19 Uint32 lineNumber,
|
255 david.dillard 1.32 MessageLoaderParms& msgParms)
|
256 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
257 {
|
258 david.dillard 1.32 if (msgParms.default_msg.size())
|
259 humberto 1.21 {
|
260 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
261 }
262 _rep->message.append(MessageLoader::getMessage(msgParms));
|
263 chuck 1.19 }
264
265
|
266 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
267 //
268 // XmlValidationError
269 //
270 ////////////////////////////////////////////////////////////////////////////////
271
272 XmlValidationError::XmlValidationError(
273 Uint32 lineNumber,
274 const String& message)
275 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
276 {
277 }
278
|
279 chuck 1.19
280 XmlValidationError::XmlValidationError(
281 Uint32 lineNumber,
282 MessageLoaderParms& msgParms)
283 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
284 {
285 }
286
287
|
288 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
289 //
290 // XmlSemanticError
291 //
292 ////////////////////////////////////////////////////////////////////////////////
293
294 XmlSemanticError::XmlSemanticError(
295 Uint32 lineNumber,
296 const String& message)
297 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
298 {
299 }
|
300 chuck 1.19
301
302 XmlSemanticError::XmlSemanticError(
303 Uint32 lineNumber,
304 MessageLoaderParms& msgParms)
305 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
306 {
307 }
308
|
309 mike 1.13
310 ////////////////////////////////////////////////////////////////////////////////
311 //
312 // XmlParser
313 //
314 ////////////////////////////////////////////////////////////////////////////////
315
|
316 dmitry.mikulin 1.43.2.2 XmlParser::XmlParser(char* text, XmlNamespace* ns)
|
317 kumpf 1.40 : _line(1),
318 _current(text),
319 _restoreChar('\0'),
|
320 dmitry.mikulin 1.43.2.2 _foundRoot(false),
321 _scopeLevel(0),
322 _supportedNamespaces(ns)
|
323 mike 1.13 {
324 }
325
|
326 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
327 {
328 while (*p && _isspace(*p))
329 {
330 if (*p == '\n')
331 line++;
332
333 p++;
334 }
335 }
336
|
337 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
338 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
339 #pragma optimize( "", off )
340 #endif
|
341 kumpf 1.37 static int _getEntityRef(char*& p)
342 {
343 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
344 {
345 p += 3;
346 return '>';
347 }
348
349 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
350 {
351 p += 3;
352 return '<';
353 }
354
355 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
356 (p[4] == ';'))
357 {
358 p += 5;
359 return '\'';
360 }
361
362 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
363 (p[4] == ';'))
364 {
365 p += 5;
366 return '"';
367 }
368
369 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
370 {
371 p += 4;
372 return '&';
373 }
374
375 return -1;
376 }
|
377 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
378 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
379 #pragma optimize( "", on )
380 #endif
|
381 kumpf 1.37
382 static inline int _getCharRef(char*& p, bool hex)
383 {
384 char* end;
385 unsigned long ch;
386
387 if (hex)
388 {
389 ch = strtoul(p, &end, 16);
390 }
391 else
392 {
393 ch = strtoul(p, &end, 10);
394 }
395
396 if ((end == p) || (*end != ';') || (ch > 255))
397 {
398 return -1;
399 }
400
401 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
402 kumpf 1.37 {
403 return -1;
404 }
405
406 p = end + 1;
407
408 return ch;
409 }
410
411 static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
412 {
413 // Skip over leading whitespace:
414
415 _skipWhitespace(line, p);
416 start = p;
417
418 // Process one character at a time:
419
420 char* q = p;
421
422 while (*p && (*p != end_char))
423 kumpf 1.37 {
424 if (_isspace(*p))
425 {
426 // Compress sequences of whitespace characters to a single space
427 // character. Update line number when newlines encountered.
428
429 if (*p++ == '\n')
430 {
431 line++;
432 }
433
434 *q++ = ' ';
435
436 _skipWhitespace(line, p);
437 }
438 else if (*p == '&')
439 {
440 // Process entity characters and entity references:
441
442 p++;
443 int ch;
444 kumpf 1.37
445 if (*p == '#')
446 {
447 *p++;
448
449 if (*p == 'x')
450 {
451 p++;
452 ch = _getCharRef(p, true);
453 }
454 else
455 {
456 ch = _getCharRef(p, false);
457 }
458 }
459 else
460 {
461 ch = _getEntityRef(p);
462 }
463
464 if (ch == -1)
465 kumpf 1.37 {
466 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
467 }
468
469 *q++ = ch;
470 }
471 else
472 {
473 *q++ = *p++;
474 }
475 }
476
|
477 kumpf 1.40 // We encountered a the end_char or a zero-terminator.
|
478 kumpf 1.37
479 *q = *p;
480
481 // Remove single trailing whitespace (consecutive whitespaces already
482 // compressed above). Since p >= q, we can tell if we need to strip a
483 // trailing space from q by looking at the end of p. We must not look at
484 // the last character of p, though, if p is an empty string.
485
486 if ((p != start) && _isspace(p[-1]))
487 {
488 q--;
489 }
490
491 // If q got behind p, it is safe and necessary to null-terminate q
492
493 if (q != p)
494 {
495 *q = '\0';
496 }
497 }
498
|
499 kumpf 1.43.2.1 Boolean XmlParser::next(
500 XmlEntry& entry,
|
501 dmitry.mikulin 1.43.2.2 Boolean includeComment)
|
502 mike 1.13 {
|
503 kumpf 1.43.2.1 entry.attributes.clear();
504
|
505 mike 1.13 if (!_putBackStack.isEmpty())
506 {
|
507 david.dillard 1.32 entry = _putBackStack.top();
508 _putBackStack.pop();
509 return true;
|
510 mike 1.13 }
511
512 // If a character was overwritten with a null-terminator the last
513 // time this routine was called, then put back that character. Before
514 // exiting of course, restore the null-terminator.
515
516 char* nullTerminator = 0;
517
518 if (_restoreChar && !*_current)
519 {
|
520 david.dillard 1.32 nullTerminator = _current;
521 *_current = _restoreChar;
522 _restoreChar = '\0';
|
523 mike 1.13 }
524
|
525 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
526 do
527 {
528 // Skip over any whitespace:
529 _skipWhitespace(_line, _current);
530
531 if (!*_current)
532 {
533 if (nullTerminator)
534 *nullTerminator = '\0';
|
535 mike 1.13
|
536 venkat.puvvada 1.41 if (!_stack.isEmpty())
537 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
538 mike 1.13
|
539 venkat.puvvada 1.41 return false;
540 }
|
541 mike 1.13
|
542 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
543 mike 1.13
|
544 venkat.puvvada 1.41 if (*_current == '<')
545 {
546 _current++;
547 _getElement(_current, entry);
|
548 mike 1.13
|
549 venkat.puvvada 1.41 if (nullTerminator)
550 *nullTerminator = '\0';
|
551 mike 1.13
|
552 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
553 {
554 if (_stack.isEmpty() && _foundRoot)
555 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
556 mike 1.13
|
557 venkat.puvvada 1.41 _foundRoot = true;
558 _stack.push((char*)entry.text);
559 }
560 else if (entry.type == XmlEntry::END_TAG)
561 {
562 if (_stack.isEmpty())
563 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
564 mike 1.13
|
565 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
566 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
567 david.dillard 1.32
|
568 venkat.puvvada 1.41 _stack.pop();
569 }
|
570 david.dillard 1.32 }
|
571 venkat.puvvada 1.41 else
|
572 david.dillard 1.32 {
|
573 venkat.puvvada 1.41 // Normalize the content:
|
574 mike 1.13
|
575 venkat.puvvada 1.41 char* start;
576 _normalize(_line, _current, '<', start);
|
577 mike 1.13
|
578 venkat.puvvada 1.41 // Get the content:
|
579 mike 1.13
|
580 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
581 entry.text = start;
|
582 kumpf 1.37
|
583 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
584 kumpf 1.37
|
585 venkat.puvvada 1.41 _restoreChar = *_current;
586 *_current = '\0';
|
587 kumpf 1.37
|
588 venkat.puvvada 1.41 if (nullTerminator)
589 *nullTerminator = '\0';
590 }
|
591 dmitry.mikulin 1.43.2.2 } while (!includeComment && entry.type == XmlEntry::COMMENT);
592
|
593 kumpf 1.43.2.3 if (_supportedNamespaces &&
594 (entry.type == XmlEntry::START_TAG ||
595 entry.type == XmlEntry::EMPTY_TAG ||
596 entry.type == XmlEntry::END_TAG))
|
597 dmitry.mikulin 1.43.2.2 {
598 // Process attributes and enter namespaces into the table
599 if (entry.type == XmlEntry::START_TAG ||
600 entry.type == XmlEntry::EMPTY_TAG)
601 {
602 _scopeLevel++;
603 for (unsigned int i = 0; i < entry.attributes.size(); i++)
604 {
|
605 kumpf 1.43.2.3 XmlAttribute& attr = entry.attributes[i];
|
606 dmitry.mikulin 1.43.2.4 if (strncmp(attr.name, "xmlns", 5) == 0)
|
607 dmitry.mikulin 1.43.2.2 {
608 XmlNamespace ns;
|
609 dmitry.mikulin 1.43.2.4 if (attr.name[5] == ':')
|
610 dmitry.mikulin 1.43.2.2 {
|
611 dmitry.mikulin 1.43.2.4 ns.localName = attr.name + 6;
612
613 // Check if we have malformed XML of the form:
614 // "xmlns:=URI". In this case attr.name will be set
615 // to "xmlns:" and ns.localName will point to '\0'
616 if (ns.localName[0] == '\0')
617 {
618 throw XmlException(
619 XmlException::MALFORMED_NAMESPACE_DECL,
620 _line);
621 }
|
622 dmitry.mikulin 1.43.2.2 }
623 else
624 {
|
625 dmitry.mikulin 1.43.2.4 // Default name space has no local name
626 ns.localName = 0;
|
627 dmitry.mikulin 1.43.2.2 }
|
628 dmitry.mikulin 1.43.2.4 ns.extendedName = attr.value;
629 ns.scopeLevel = _scopeLevel;
630 ns.type = getSupportedNamespaceType(ns.extendedName);
631
632 // Even unsupported namespaces get pushed onto the stack.
633 // We will throw an exception of there is an attempt to
634 // reference an unsupported namespace later.
635 _nameSpaces.push(ns);
636 }
637 else
638 {
639 // Attribute names may also be namespace qualified.
640 attr.nsType = _getNamespaceType(attr.name);
|
641 dmitry.mikulin 1.43.2.2 }
642 }
643 }
|
644 kumpf 1.43.2.3
|
645 dmitry.mikulin 1.43.2.4 // Get the namespace type for this tag.
646 entry.nsType = _getNamespaceType(entry.text);
|
647 dmitry.mikulin 1.43.2.2
|
648 kumpf 1.43.2.3 if (entry.type == XmlEntry::END_TAG ||
649 entry.type == XmlEntry::EMPTY_TAG)
650 {
|
651 dmitry.mikulin 1.43.2.2 // Remove any namespaces of the current scope level from
652 // the scope stack.
653 while (!_nameSpaces.isEmpty() &&
654 _scopeLevel <= _nameSpaces.top().scopeLevel)
655 {
656 _nameSpaces.pop();
657 }
658
659 PEGASUS_ASSERT(_scopeLevel > 0);
660 _scopeLevel--;
661 }
662 }
663 else
664 {
665 entry.nsType = -1;
666 }
|
667 kumpf 1.37
|
668 venkat.puvvada 1.41 return true;
|
669 mike 1.13 }
670
|
671 dmitry.mikulin 1.43.2.2 // Get the namespace type of the given tag
672 int XmlParser::_getNamespaceType(const char* tag)
673 {
674 const char* pos = strchr(tag, ':');
675
|
676 dmitry.mikulin 1.43.2.4 // If ":" is not found, the tag is not namespace qualified and we
677 // need to look for the default name space.
|
678 dmitry.mikulin 1.43.2.2
679 // Search the namespace stack from the top
680 for (int i = _nameSpaces.size() - 1; i >=0; i--)
681 {
|
682 dmitry.mikulin 1.43.2.4 // If ":" is found, look for the name space with the matching
683 // local name...
684 if ((pos && _nameSpaces[i].localName &&
685 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
686 // ... otherwise look for the default name space. It's the
687 // one with localName set to NULL
688 (!pos && !_nameSpaces[i].localName))
689 {
690 // If it's a reference to an unsupported namespace,
691 // throw an exception
692 if (_nameSpaces[i].type == -1)
693 {
694 throw XmlException(XmlException::UNSUPPORTED_NAMESPACE, _line);
695 }
|
696 dmitry.mikulin 1.43.2.2 return _nameSpaces[i].type;
697 }
698 }
|
699 dmitry.mikulin 1.43.2.4
700 // If the tag is namespace qualified, but the name space has not been
|
701 kumpf 1.43.2.7 // declared, it's malformed XML and we must throw an exception.
702 // Note: The "xml" namespace is specifically defined by the W3C as a
703 // reserved prefix ("http://www.w3.org/XML/1998/namespace").
704 if (pos && (strncmp(tag, "xml:", 4) != 0))
|
705 dmitry.mikulin 1.43.2.4 {
706 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
707 }
708
709 // Otherwise it's OK not to have a name space.
|
710 dmitry.mikulin 1.43.2.2 return -1;
711 }
712
713 // Gived the extended namespace name, find it in the table of supported
714 // namespaces and return its type.
715 int XmlParser::getSupportedNamespaceType(const char* extendedName)
716 {
717 for (int i = 0;
718 _supportedNamespaces[i].localName != 0;
719 i++)
720 {
721 PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
722 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
723 {
724 return _supportedNamespaces[i].type;
725 }
726 }
727 return -1;
728 }
729
730 XmlNamespace* XmlParser::getNamespace(int nsType)
731 dmitry.mikulin 1.43.2.2 {
732 for (int i = _nameSpaces.size() - 1; i >=0; i--)
733 {
734 if (_nameSpaces[i].type == nsType)
735 {
736 return &_nameSpaces[i];
737 }
738 }
739 return 0;
740 }
741
|
742 mike 1.13 void XmlParser::putBack(XmlEntry& entry)
743 {
744 _putBackStack.push(entry);
745 }
746
747 XmlParser::~XmlParser()
748 {
749 // Nothing to do!
750 }
751
|
752 mike 1.35 // A-Za-z0-9_-:.
|
753 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
754 mike 1.35 {
755 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
756 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
757 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
758 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
759 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
760 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
761 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
762 };
763
|
764 mike 1.13 Boolean XmlParser::_getElementName(char*& p)
765 {
|
766 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
767 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
768 mike 1.35
|
769 kumpf 1.24 p++;
|
770 mike 1.13
|
771 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
772 david.dillard 1.32 p++;
|
773 mike 1.13
774 // The next character must be a space:
775
|
776 chuck 1.26 if (_isspace(*p))
|
777 mike 1.13 {
|
778 david.dillard 1.32 *p++ = '\0';
|
779 mike 1.34 _skipWhitespace(_line, p);
|
780 mike 1.13 }
781
782 if (*p == '>')
783 {
|
784 david.dillard 1.32 *p++ = '\0';
785 return true;
|
786 mike 1.13 }
787
788 return false;
789 }
790
791 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
792 {
793 openCloseElement = false;
794
|
795 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
796 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
797 mike 1.35
|
798 kumpf 1.24 p++;
|
799 mike 1.13
|
800 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
801 david.dillard 1.32 p++;
|
802 mike 1.13
803 // The next character must be a space:
804
|
805 chuck 1.26 if (_isspace(*p))
|
806 mike 1.13 {
|
807 david.dillard 1.32 *p++ = '\0';
|
808 mike 1.34 _skipWhitespace(_line, p);
|
809 mike 1.13 }
810
811 if (*p == '>')
812 {
|
813 david.dillard 1.32 *p++ = '\0';
814 return true;
|
815 mike 1.13 }
816
817 if (p[0] == '/' && p[1] == '>')
818 {
|
819 david.dillard 1.32 openCloseElement = true;
820 *p = '\0';
821 p += 2;
822 return true;
|
823 mike 1.13 }
824
825 return false;
826 }
827
828 void XmlParser::_getAttributeNameAndEqual(char*& p)
829 {
|
830 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
|
831 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
832 mike 1.35
|
833 kumpf 1.24 p++;
|
834 mike 1.13
|
835 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
836 david.dillard 1.32 p++;
|
837 mike 1.13
838 char* term = p;
839
|
840 mike 1.34 _skipWhitespace(_line, p);
|
841 mike 1.13
842 if (*p != '=')
|
843 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
844 mike 1.13
845 p++;
846
|
847 mike 1.34 _skipWhitespace(_line, p);
|
848 mike 1.13
849 *term = '\0';
850 }
851
852 void XmlParser::_getComment(char*& p)
853 {
854 // Now p points to first non-whitespace character beyond "<--" sequence:
855
856 for (; *p; p++)
857 {
|
858 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
859 {
860 if (p[2] != '>')
861 {
862 throw XmlException(
863 XmlException::MINUS_MINUS_IN_COMMENT, _line);
864 }
865
866 // Find end of comment (excluding whitespace):
867
868 *p = '\0';
869 p += 3;
870 return;
871 }
|
872 mike 1.13 }
873
874 // If it got this far, then the comment is unterminated:
875
876 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
877 }
878
879 void XmlParser::_getCData(char*& p)
880 {
881 // At this point p points one past "<![CDATA[" sequence:
882
883 for (; *p; p++)
884 {
|
885 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
886 {
887 *p = '\0';
888 p += 3;
889 return;
890 }
891 else if (*p == '\n')
892 _line++;
|
893 mike 1.13 }
894
895 // If it got this far, then the comment is unterminated:
896
897 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
898 }
899
900 void XmlParser::_getDocType(char*& p)
901 {
902 // Just ignore the DOCTYPE command for now:
903
904 for (; *p && *p != '>'; p++)
905 {
|
906 david.dillard 1.32 if (*p == '\n')
907 _line++;
|
908 mike 1.13 }
909
910 if (*p != '>')
|
911 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
912 mike 1.13
913 p++;
914 }
915
916 void XmlParser::_getElement(char*& p, XmlEntry& entry)
917 {
918 //--------------------------------------------------------------------------
919 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
920 //--------------------------------------------------------------------------
921
922 if (*p == '?')
923 {
|
924 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
925 entry.text = ++p;
|
926 mike 1.13
|
927 david.dillard 1.32 Boolean openCloseElement = false;
|
928 mike 1.13
|
929 david.dillard 1.32 if (_getElementName(p))
930 return;
|
931 mike 1.13 }
932 else if (*p == '!')
933 {
|
934 david.dillard 1.32 p++;
|
935 mike 1.13
|
936 david.dillard 1.32 // Expect a comment or CDATA:
|
937 mike 1.13
|
938 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
939 {
940 p += 2;
941 entry.type = XmlEntry::COMMENT;
942 entry.text = p;
943 _getComment(p);
944 return;
945 }
946 else if (memcmp(p, "[CDATA[", 7) == 0)
947 {
948 p += 7;
949 entry.type = XmlEntry::CDATA;
950 entry.text = p;
951 _getCData(p);
952 return;
953 }
954 else if (memcmp(p, "DOCTYPE", 7) == 0)
955 {
956 entry.type = XmlEntry::DOCTYPE;
|
957 kumpf 1.37 entry.text = "";
|
958 david.dillard 1.32 _getDocType(p);
959 return;
960 }
961 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
962 mike 1.13 }
963 else if (*p == '/')
964 {
|
965 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
966 entry.text = ++p;
|
967 mike 1.13
|
968 david.dillard 1.32 if (!_getElementName(p))
969 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
970 mike 1.13
|
971 david.dillard 1.32 return;
|
972 mike 1.13 }
|
973 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
974 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
975 (*p == '_')))
|
976 mike 1.13 {
|
977 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
978 entry.text = p;
|
979 mike 1.13
|
980 david.dillard 1.32 Boolean openCloseElement = false;
|
981 mike 1.13
|
982 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement))
983 {
984 if (openCloseElement)
985 entry.type = XmlEntry::EMPTY_TAG;
986 return;
987 }
|
988 mike 1.13 }
989 else
|
990 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
991 mike 1.13
992 //--------------------------------------------------------------------------
993 // Grab all the attributes:
994 //--------------------------------------------------------------------------
995
996 for (;;)
997 {
|
998 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
999 {
1000 if (p[0] == '?' && p[1] == '>')
1001 {
1002 p += 2;
1003 return;
1004 }
1005 }
1006 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
1007 {
1008 entry.type = XmlEntry::EMPTY_TAG;
1009 p += 2;
1010 return;
1011 }
1012 else if (*p == '>')
1013 {
1014 p++;
1015 return;
1016 }
1017
1018 XmlAttribute attr;
|
1019 dmitry.mikulin 1.43.2.4 attr.nsType = -1;
|
1020 david.dillard 1.32 attr.name = p;
1021 _getAttributeNameAndEqual(p);
1022
|
1023 kumpf 1.37 // Get the attribute value (e.g., "some value")
1024 {
1025 if ((*p != '"') && (*p != '\''))
1026 {
1027 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1028 }
1029
1030 char quote = *p++;
1031
1032 char* start;
1033 _normalize(_line, p, quote, start);
1034 attr.value = start;
1035
1036 if (*p != quote)
1037 {
1038 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1039 }
1040
1041 // Overwrite the closing quote with a null-terminator:
|
1042 david.dillard 1.32
|
1043 kumpf 1.37 *p++ = '\0';
1044 }
|
1045 david.dillard 1.32
1046 if (entry.type == XmlEntry::XML_DECLARATION)
1047 {
1048 // The next thing must a space or a "?>":
|
1049 mike 1.13
|
1050 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
1051 {
1052 throw XmlException(
1053 XmlException::BAD_ATTRIBUTE_VALUE, _line);
1054 }
1055 }
1056 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
1057 {
1058 // The next thing must be a space or a '>':
|
1059 mike 1.13
|
1060 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1061 }
|
1062 mike 1.13
|
1063 mike 1.34 _skipWhitespace(_line, p);
|
1064 david.dillard 1.32
|
1065 kumpf 1.43.2.1 entry.attributes.append(attr);
|
1066 mike 1.13 }
1067 }
1068
1069 static const char* _typeStrings[] =
1070 {
|
1071 david.dillard 1.32 "XML_DECLARATION",
1072 "START_TAG",
1073 "EMPTY_TAG",
1074 "END_TAG",
|
1075 mike 1.13 "COMMENT",
1076 "CDATA",
1077 "DOCTYPE",
|
1078 david.dillard 1.32 "CONTENT"
|
1079 mike 1.13 };
1080
|
1081 kumpf 1.43.2.6 const char* XmlEntry::getUnqualifiedName() const
1082 {
1083 PEGASUS_ASSERT(
1084 (type == XmlEntry::START_TAG) ||
1085 (type == XmlEntry::EMPTY_TAG) ||
1086 (type == XmlEntry::END_TAG));
1087
1088 const char* colonPos = strchr(text, ':');
1089 if (colonPos == NULL)
1090 {
1091 return text;
1092 }
1093
1094 return colonPos + 1;
1095 }
1096
|
1097 mike 1.13 void XmlEntry::print() const
1098 {
1099 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1100
1101 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1102
1103 if (needQuotes)
|
1104 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1105
|
1106 mike 1.13 _printValue(text);
1107
1108 if (needQuotes)
|
1109 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1110 mike 1.13
1111 PEGASUS_STD(cout) << '\n';
1112
|
1113 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
|
1114 mike 1.13 {
|
1115 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1116 _printValue(attributes[i].value);
1117 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1118 mike 1.13 }
1119 }
1120
1121 const XmlAttribute* XmlEntry::findAttribute(
1122 const char* name) const
1123 {
|
1124 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
|
1125 mike 1.13 {
|
1126 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1127 return &attributes[i];
|
1128 mike 1.13 }
1129
1130 return 0;
1131 }
1132
|
1133 kumpf 1.43.2.6 const XmlAttribute* XmlEntry::findAttribute(
1134 int nsType,
1135 const char* name) const
1136 {
1137 for (Uint32 i = 0; i < attributes.size(); i++)
1138 {
1139 if ((attributes[i].nsType == nsType) &&
1140 (strcmp(attributes[i].name, name) == 0))
1141 {
1142 return &attributes[i];
1143 }
1144 }
1145
1146 return 0;
1147 }
1148
|
1149 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace
1150 // character (set last one past this). For example, consider this string:
1151 //
|
1152 david.dillard 1.32 // " 87 "
|
1153 mike 1.13 //
1154 // The first pointer would point to '8' and the last pointer woudl point one
1155 // beyond '7'.
1156
1157 static void _findEnds(
|
1158 david.dillard 1.32 const char* str,
1159 const char*& first,
|
1160 mike 1.13 const char*& last)
1161 {
1162 first = str;
1163
|
1164 chuck 1.26 while (_isspace(*first))
|
1165 david.dillard 1.32 first++;
|
1166 mike 1.13
1167 if (!*first)
1168 {
|
1169 david.dillard 1.32 last = first;
1170 return;
|
1171 mike 1.13 }
1172
1173 last = first + strlen(first);
1174
|
1175 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1176 david.dillard 1.32 last--;
|
1177 mike 1.13 }
1178
1179 Boolean XmlEntry::getAttributeValue(
|
1180 david.dillard 1.32 const char* name,
|
1181 mike 1.13 Uint32& value) const
1182 {
1183 const XmlAttribute* attr = findAttribute(name);
1184
1185 if (!attr)
|
1186 david.dillard 1.32 return false;
|
1187 mike 1.13
1188 const char* first;
1189 const char* last;
1190 _findEnds(attr->value, first, last);
1191
1192 char* end = 0;
1193 long tmp = strtol(first, &end, 10);
1194
1195 if (!end || end != last)
|
1196 david.dillard 1.32 return false;
|
1197 mike 1.13
1198 value = Uint32(tmp);
1199 return true;
1200 }
1201
1202 Boolean XmlEntry::getAttributeValue(
|
1203 david.dillard 1.32 const char* name,
|
1204 mike 1.13 Real32& value) const
1205 {
1206 const XmlAttribute* attr = findAttribute(name);
1207
1208 if (!attr)
|
1209 david.dillard 1.32 return false;
|
1210 mike 1.13
1211 const char* first;
1212 const char* last;
1213 _findEnds(attr->value, first, last);
1214
1215 char* end = 0;
1216 double tmp = strtod(first, &end);
1217
1218 if (!end || end != last)
|
1219 david.dillard 1.32 return false;
|
1220 mike 1.13
|
1221 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1222 mike 1.13 return true;
1223 }
1224
1225 Boolean XmlEntry::getAttributeValue(
|
1226 david.dillard 1.32 const char* name,
|
1227 mike 1.13 const char*& value) const
1228 {
1229 const XmlAttribute* attr = findAttribute(name);
1230
1231 if (!attr)
|
1232 david.dillard 1.32 return false;
|
1233 mike 1.13
1234 value = attr->value;
1235 return true;
1236 }
1237
1238 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1239 {
1240 const char* tmp;
1241
1242 if (!getAttributeValue(name, tmp))
|
1243 david.dillard 1.32 return false;
|
1244 mike 1.13
|
1245 chuck 1.28 value = String(tmp);
|
1246 mike 1.13 return true;
1247 }
1248
|
1249 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1250 mike 1.13 {
|
1251 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1252 mike 1.13 }
1253
1254 PEGASUS_NAMESPACE_END
|