1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 ////////////////////////////////////////////////////////////////////////////////
35 //
36 // XmlParser
37 //
|
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
39 // serveral rules for well-formed XML:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
42 mike 1.13 //
|
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
44 mike 1.13 //
|
45 david.dillard 1.32 // 2. Comments have the form:
|
46 mike 1.13 //
|
47 david.dillard 1.32 // <!-- blah blah blah -->
|
48 mike 1.13 //
|
49 david.dillard 1.32 // 3. The following entity references are supported:
|
50 mike 1.13 //
|
51 david.dillard 1.32 // & - ampersand
52 // < - less-than
53 // > - greater-than
54 // " - full quote
55 // &apos - apostrophe
|
56 mike 1.13 //
|
57 kumpf 1.18 // as well as character (numeric) references:
|
58 mike 1.35 //
|
59 kumpf 1.18 // 1 - decimal reference for character '1'
60 // 1 - hexadecimal reference for character '1'
61 //
|
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
63 mike 1.13 //
|
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
65 mike 1.13 //
|
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
67 mike 1.13 //
|
68 david.dillard 1.32 // <![CDATA[
69 // ...
70 // ]]>
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
73 mike 1.13 //
|
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
75 // XmlAttribute values must be delimited.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 8. <!DOCTYPE...>
|
78 mike 1.13 //
79 // TODO:
80 //
|
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
83 mike 1.13 // rules rather than references to files).
84 //
|
85 david.dillard 1.32 // Remove newlines from string literals:
|
86 mike 1.13 //
87 // Example: <xyz x="hello
|
88 david.dillard 1.32 // world">
|
89 mike 1.13 //
90 ////////////////////////////////////////////////////////////////////////////////
91
|
92 sage 1.14 #include <Pegasus/Common/Config.h>
|
93 mike 1.13 #include <cctype>
94 #include <cstdio>
95 #include <cstdlib>
96 #include <cstring>
97 #include "XmlParser.h"
98 #include "Logger.h"
|
99 chuck 1.19 #include "ExceptionRep.h"
|
100 mike 1.34 #include "CharSet.h"
|
101 mike 1.13
102 PEGASUS_NAMESPACE_BEGIN
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
|
114 david.dillard 1.32 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
|
122 mike 1.13 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
|
142 chuck 1.26
143 // Implements a check for a whitespace character, without calling
144 // isspace( ). The isspace( ) function is locale-sensitive,
145 // and incorrectly flags some chars above 0x7f as whitespace. This
146 // causes the XmlParser to incorrectly parse UTF-8 data.
147 //
148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
149 // defines white space as:
|
150 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
151 mike 1.34 static inline int _isspace(char c)
|
152 chuck 1.26 {
|
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
154 chuck 1.26 }
155
|
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
157
158 ////////////////////////////////////////////////////////////////////////////////
159 //
160 // XmlException
161 //
162 ////////////////////////////////////////////////////////////////////////////////
163
164 static const char* _xmlMessages[] =
165 {
166 "Bad opening element",
167 "Bad closing element",
168 "Bad attribute name",
169 "Exepected equal sign",
170 "Bad attribute value",
171 "A \"--\" sequence found within comment",
172 "Unterminated comment",
173 "Unterminated CDATA block",
174 "Unterminated DOCTYPE",
175 "Malformed reference",
176 "Expected a comment or CDATA following \"<!\" sequence",
177 mike 1.13 "Closing element does not match opening element",
178 "One or more tags are still open",
179 "More than one root element was encountered",
180 "Validation error",
|
181 dmitry.mikulin 1.43.2.4 "Semantic error",
182 "Namespace not declared"
|
183 mike 1.13 };
184
|
185 david.dillard 1.32 static const char* _xmlKeys[] =
|
186 chuck 1.19 {
|
187 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
188 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
189 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
190 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
191 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
192 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
193 "Common.XmlParser.UNTERMINATED_COMMENT",
194 "Common.XmlParser.UNTERMINATED_CDATA",
195 "Common.XmlParser.UNTERMINATED_DOCTYPE",
196 "Common.XmlParser.MALFORMED_REFERENCE",
197 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
198 "Common.XmlParser.START_END_MISMATCH",
|
199 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
200 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
201 "Common.XmlParser.VALIDATION_ERROR",
|
202 dmitry.mikulin 1.43.2.4 "Common.XmlParser.SEMANTIC_ERROR",
203 "Common.XmlParser.UNDECLARED_NAMESPACE"
|
204 chuck 1.19 };
205
206
|
207 kumpf 1.40 static MessageLoaderParms _formMessage(
208 Uint32 code,
209 Uint32 line,
210 const String& message)
|
211 chuck 1.19 {
212 String dftMsg = _xmlMessages[Uint32(code) - 1];
213 String key = _xmlKeys[Uint32(code) - 1];
|
214 dmitry.mikulin 1.43.2.5 String msg = message;
|
215 chuck 1.19
216 dftMsg.append(": on line $0");
217 if (message.size())
218 {
|
219 david.dillard 1.32 msg = ": " + msg;
220 dftMsg.append("$1");
221 }
|
222 chuck 1.19
223 return MessageLoaderParms(key, dftMsg, line ,msg);
224 }
225
226 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
227 {
228 String dftMsg = _xmlMessages[Uint32(code) - 1];
229 String key = _xmlKeys[Uint32(code) - 1];
230
231 dftMsg.append(": on line $0");
|
232 david.dillard 1.32
|
233 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
234 }
235
|
236 mike 1.13
237 XmlException::XmlException(
|
238 david.dillard 1.32 XmlException::Code code,
|
239 mike 1.13 Uint32 lineNumber,
|
240 david.dillard 1.32 const String& message)
|
241 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
242 {
243
244 }
245
|
246 chuck 1.19
247 XmlException::XmlException(
|
248 david.dillard 1.32 XmlException::Code code,
|
249 chuck 1.19 Uint32 lineNumber,
|
250 david.dillard 1.32 MessageLoaderParms& msgParms)
|
251 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
252 {
|
253 david.dillard 1.32 if (msgParms.default_msg.size())
|
254 humberto 1.21 {
|
255 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
256 }
257 _rep->message.append(MessageLoader::getMessage(msgParms));
|
258 chuck 1.19 }
259
260
|
261 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
262 //
263 // XmlValidationError
264 //
265 ////////////////////////////////////////////////////////////////////////////////
266
267 XmlValidationError::XmlValidationError(
268 Uint32 lineNumber,
269 const String& message)
270 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
271 {
272 }
273
|
274 chuck 1.19
275 XmlValidationError::XmlValidationError(
276 Uint32 lineNumber,
277 MessageLoaderParms& msgParms)
278 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
279 {
280 }
281
282
|
283 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
284 //
285 // XmlSemanticError
286 //
287 ////////////////////////////////////////////////////////////////////////////////
288
289 XmlSemanticError::XmlSemanticError(
290 Uint32 lineNumber,
291 const String& message)
292 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
293 {
294 }
|
295 chuck 1.19
296
297 XmlSemanticError::XmlSemanticError(
298 Uint32 lineNumber,
299 MessageLoaderParms& msgParms)
300 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
301 {
302 }
303
|
304 mike 1.13
305 ////////////////////////////////////////////////////////////////////////////////
306 //
307 // XmlParser
308 //
309 ////////////////////////////////////////////////////////////////////////////////
310
|
311 dmitry.mikulin 1.43.2.2 XmlParser::XmlParser(char* text, XmlNamespace* ns)
|
312 kumpf 1.40 : _line(1),
313 _current(text),
314 _restoreChar('\0'),
|
315 dmitry.mikulin 1.43.2.2 _foundRoot(false),
|
316 dmitry.mikulin 1.43.2.10 _supportedNamespaces(ns),
317 _currentUnsupportedNSType(-1)
|
318 mike 1.13 {
319 }
320
|
321 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
322 {
323 while (*p && _isspace(*p))
324 {
325 if (*p == '\n')
326 line++;
327
328 p++;
329 }
330 }
331
|
332 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
333 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
334 #pragma optimize( "", off )
335 #endif
|
336 kumpf 1.37 static int _getEntityRef(char*& p)
337 {
338 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
339 {
340 p += 3;
341 return '>';
342 }
343
344 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
345 {
346 p += 3;
347 return '<';
348 }
349
350 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
351 (p[4] == ';'))
352 {
353 p += 5;
354 return '\'';
355 }
356
357 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
358 (p[4] == ';'))
359 {
360 p += 5;
361 return '"';
362 }
363
364 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
365 {
366 p += 4;
367 return '&';
368 }
369
370 return -1;
371 }
|
372 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
373 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
|
374 kumpf 1.43.2.12 #pragma optimize( "", on )
|
375 s.manicka 1.43 #endif
|
376 kumpf 1.37
377 static inline int _getCharRef(char*& p, bool hex)
378 {
379 char* end;
380 unsigned long ch;
381
382 if (hex)
383 {
384 ch = strtoul(p, &end, 16);
385 }
386 else
387 {
388 ch = strtoul(p, &end, 10);
389 }
390
391 if ((end == p) || (*end != ';') || (ch > 255))
392 {
393 return -1;
394 }
395
396 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
397 kumpf 1.37 {
398 return -1;
399 }
400
401 p = end + 1;
402
403 return ch;
404 }
405
406 static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
407 {
408 // Skip over leading whitespace:
409
410 _skipWhitespace(line, p);
411 start = p;
412
413 // Process one character at a time:
414
415 char* q = p;
416
417 while (*p && (*p != end_char))
418 kumpf 1.37 {
419 if (_isspace(*p))
420 {
421 // Compress sequences of whitespace characters to a single space
422 // character. Update line number when newlines encountered.
423
424 if (*p++ == '\n')
425 {
426 line++;
427 }
428
429 *q++ = ' ';
430
431 _skipWhitespace(line, p);
432 }
433 else if (*p == '&')
434 {
435 // Process entity characters and entity references:
436
437 p++;
438 int ch;
439 kumpf 1.37
440 if (*p == '#')
441 {
442 *p++;
443
444 if (*p == 'x')
445 {
446 p++;
447 ch = _getCharRef(p, true);
448 }
449 else
450 {
451 ch = _getCharRef(p, false);
452 }
453 }
454 else
455 {
456 ch = _getEntityRef(p);
457 }
458
459 if (ch == -1)
460 kumpf 1.37 {
461 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
462 }
463
464 *q++ = ch;
465 }
466 else
467 {
468 *q++ = *p++;
469 }
470 }
471
472 // Remove single trailing whitespace (consecutive whitespaces already
473 // compressed above). Since p >= q, we can tell if we need to strip a
474 // trailing space from q by looking at the end of p. We must not look at
475 // the last character of p, though, if p is an empty string.
|
476 dmitry.mikulin 1.43.2.13 Boolean adjust_q = (p != start) && _isspace(p[-1]);
477
478 // We encountered the end_char or a zero-terminator.
479 *q = *p;
|
480 kumpf 1.37
|
481 dmitry.mikulin 1.43.2.13 if (adjust_q)
|
482 kumpf 1.37 {
483 q--;
484 }
485
486 // If q got behind p, it is safe and necessary to null-terminate q
487
488 if (q != p)
489 {
490 *q = '\0';
491 }
492 }
493
|
494 kumpf 1.43.2.1 Boolean XmlParser::next(
495 XmlEntry& entry,
|
496 dmitry.mikulin 1.43.2.2 Boolean includeComment)
|
497 mike 1.13 {
498 if (!_putBackStack.isEmpty())
499 {
|
500 david.dillard 1.32 entry = _putBackStack.top();
501 _putBackStack.pop();
502 return true;
|
503 mike 1.13 }
504
505 // If a character was overwritten with a null-terminator the last
506 // time this routine was called, then put back that character. Before
507 // exiting of course, restore the null-terminator.
508
509 char* nullTerminator = 0;
510
511 if (_restoreChar && !*_current)
512 {
|
513 david.dillard 1.32 nullTerminator = _current;
514 *_current = _restoreChar;
515 _restoreChar = '\0';
|
516 mike 1.13 }
517
|
518 kumpf 1.43.2.12 entry.attributes.clear();
519
520 if (_supportedNamespaces)
521 {
522 // Remove namespaces of a deeper scope level from the stack.
523 while (!_nameSpaces.isEmpty() &&
524 _nameSpaces.top().scopeLevel > _stack.size())
525 {
526 _nameSpaces.pop();
527 }
528 }
529
|
530 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
531 do
532 {
533 // Skip over any whitespace:
534 _skipWhitespace(_line, _current);
535
536 if (!*_current)
537 {
538 if (nullTerminator)
539 *nullTerminator = '\0';
|
540 mike 1.13
|
541 venkat.puvvada 1.41 if (!_stack.isEmpty())
542 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
543 mike 1.13
|
544 venkat.puvvada 1.41 return false;
545 }
|
546 mike 1.13
|
547 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
548 mike 1.13
|
549 venkat.puvvada 1.41 if (*_current == '<')
550 {
551 _current++;
552 _getElement(_current, entry);
|
553 mike 1.13
|
554 venkat.puvvada 1.41 if (nullTerminator)
555 *nullTerminator = '\0';
|
556 mike 1.13
|
557 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
558 {
559 if (_stack.isEmpty() && _foundRoot)
560 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
561 mike 1.13
|
562 venkat.puvvada 1.41 _foundRoot = true;
563 _stack.push((char*)entry.text);
564 }
565 else if (entry.type == XmlEntry::END_TAG)
566 {
567 if (_stack.isEmpty())
568 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
569 mike 1.13
|
570 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
571 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
572 david.dillard 1.32
|
573 venkat.puvvada 1.41 _stack.pop();
574 }
|
575 david.dillard 1.32 }
|
576 venkat.puvvada 1.41 else
|
577 david.dillard 1.32 {
|
578 venkat.puvvada 1.41 // Normalize the content:
|
579 mike 1.13
|
580 venkat.puvvada 1.41 char* start;
581 _normalize(_line, _current, '<', start);
|
582 mike 1.13
|
583 venkat.puvvada 1.41 // Get the content:
|
584 mike 1.13
|
585 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
586 entry.text = start;
|
587 kumpf 1.37
|
588 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
589 kumpf 1.37
|
590 venkat.puvvada 1.41 _restoreChar = *_current;
591 *_current = '\0';
|
592 kumpf 1.37
|
593 venkat.puvvada 1.41 if (nullTerminator)
594 *nullTerminator = '\0';
595 }
|
596 dmitry.mikulin 1.43.2.2 } while (!includeComment && entry.type == XmlEntry::COMMENT);
597
|
598 kumpf 1.43.2.3 if (_supportedNamespaces &&
599 (entry.type == XmlEntry::START_TAG ||
600 entry.type == XmlEntry::EMPTY_TAG ||
601 entry.type == XmlEntry::END_TAG))
|
602 dmitry.mikulin 1.43.2.2 {
|
603 kumpf 1.43.2.12 // Determine the namespace type for this entry
604
|
605 dmitry.mikulin 1.43.2.2 if (entry.type == XmlEntry::START_TAG ||
606 entry.type == XmlEntry::EMPTY_TAG)
607 {
|
608 kumpf 1.43.2.12 // Process namespace declarations and determine the namespace type
609 // for the attributes.
610
611 Uint32 scopeLevel = _stack.size();
612 if (entry.type == XmlEntry::EMPTY_TAG)
613 {
614 // Empty tags are deeper scope, but not pushed onto the stack
615 scopeLevel++;
616 }
617
|
618 dmitry.mikulin 1.43.2.2 for (unsigned int i = 0; i < entry.attributes.size(); i++)
619 {
|
620 kumpf 1.43.2.3 XmlAttribute& attr = entry.attributes[i];
|
621 kumpf 1.43.2.12 if ((strncmp(attr.name, "xmlns:", 6) == 0) ||
622 (strcmp(attr.name, "xmlns") == 0))
|
623 dmitry.mikulin 1.43.2.2 {
|
624 kumpf 1.43.2.12 // Process a namespace declaration
|
625 dmitry.mikulin 1.43.2.2 XmlNamespace ns;
|
626 dmitry.mikulin 1.43.2.4 if (attr.name[5] == ':')
|
627 dmitry.mikulin 1.43.2.2 {
|
628 kumpf 1.43.2.11 ns.localName = attr.localName;
|
629 dmitry.mikulin 1.43.2.2 }
630 else
631 {
|
632 dmitry.mikulin 1.43.2.4 // Default name space has no local name
633 ns.localName = 0;
|
634 dmitry.mikulin 1.43.2.2 }
|
635 dmitry.mikulin 1.43.2.4 ns.extendedName = attr.value;
|
636 kumpf 1.43.2.12 ns.scopeLevel = scopeLevel;
637 ns.type = _getSupportedNamespaceType(ns.extendedName);
|
638 dmitry.mikulin 1.43.2.10
639 // If the namespace is not supported, assign it a unique
|
640 kumpf 1.43.2.12 // negative identifier.
|
641 dmitry.mikulin 1.43.2.10 if (ns.type == -1)
642 {
643 ns.type = _currentUnsupportedNSType--;
644 }
|
645 kumpf 1.43.2.12
|
646 dmitry.mikulin 1.43.2.4 _nameSpaces.push(ns);
647 }
648 else
649 {
|
650 kumpf 1.43.2.12 // Get the namespace type for this attribute.
|
651 dmitry.mikulin 1.43.2.4 attr.nsType = _getNamespaceType(attr.name);
|
652 dmitry.mikulin 1.43.2.2 }
653 }
654 }
655
|
656 kumpf 1.43.2.12 entry.nsType = _getNamespaceType(entry.text);
|
657 dmitry.mikulin 1.43.2.2 }
658 else
659 {
660 entry.nsType = -1;
661 }
|
662 kumpf 1.37
|
663 venkat.puvvada 1.41 return true;
|
664 mike 1.13 }
665
|
666 dmitry.mikulin 1.43.2.2 // Get the namespace type of the given tag
667 int XmlParser::_getNamespaceType(const char* tag)
668 {
669 const char* pos = strchr(tag, ':');
670
|
671 kumpf 1.43.2.12 // If ':' is not found, the tag is not namespace qualified and we
|
672 dmitry.mikulin 1.43.2.4 // need to look for the default name space.
|
673 dmitry.mikulin 1.43.2.2
674 // Search the namespace stack from the top
675 for (int i = _nameSpaces.size() - 1; i >=0; i--)
676 {
|
677 kumpf 1.43.2.12 // If ':' is found, look for the name space with the matching
|
678 dmitry.mikulin 1.43.2.4 // local name...
679 if ((pos && _nameSpaces[i].localName &&
680 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
681 // ... otherwise look for the default name space. It's the
682 // one with localName set to NULL
683 (!pos && !_nameSpaces[i].localName))
684 {
|
685 dmitry.mikulin 1.43.2.2 return _nameSpaces[i].type;
686 }
687 }
|
688 dmitry.mikulin 1.43.2.4
689 // If the tag is namespace qualified, but the name space has not been
|
690 kumpf 1.43.2.7 // declared, it's malformed XML and we must throw an exception.
691 // Note: The "xml" namespace is specifically defined by the W3C as a
692 // reserved prefix ("http://www.w3.org/XML/1998/namespace").
693 if (pos && (strncmp(tag, "xml:", 4) != 0))
|
694 dmitry.mikulin 1.43.2.4 {
695 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
696 }
697
698 // Otherwise it's OK not to have a name space.
|
699 dmitry.mikulin 1.43.2.2 return -1;
700 }
701
|
702 kumpf 1.43.2.12 // Given the extended namespace name, find it in the table of supported
|
703 dmitry.mikulin 1.43.2.2 // namespaces and return its type.
|
704 kumpf 1.43.2.12 int XmlParser::_getSupportedNamespaceType(const char* extendedName)
|
705 dmitry.mikulin 1.43.2.2 {
|
706 kumpf 1.43.2.12 for (int i = 0;
|
707 dmitry.mikulin 1.43.2.2 _supportedNamespaces[i].localName != 0;
708 i++)
709 {
710 PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
711 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
712 {
713 return _supportedNamespaces[i].type;
714 }
715 }
716 return -1;
717 }
718
719 XmlNamespace* XmlParser::getNamespace(int nsType)
720 {
721 for (int i = _nameSpaces.size() - 1; i >=0; i--)
722 {
723 if (_nameSpaces[i].type == nsType)
724 {
725 return &_nameSpaces[i];
726 }
727 }
728 dmitry.mikulin 1.43.2.2 return 0;
729 }
730
|
731 mike 1.13 void XmlParser::putBack(XmlEntry& entry)
732 {
733 _putBackStack.push(entry);
734 }
735
736 XmlParser::~XmlParser()
737 {
738 // Nothing to do!
739 }
740
|
741 kumpf 1.43.2.11 // A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)
|
742 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
743 mike 1.35 {
744 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
745 kumpf 1.43.2.11 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
|
746 mike 1.35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
747 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
748 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
749 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
750 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
751 };
752
|
753 kumpf 1.43.2.11 inline Boolean _getQName(char*& p, const char*& localName)
|
754 mike 1.13 {
|
755 kumpf 1.43.2.11 localName = p;
756
|
757 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
758 kumpf 1.43.2.11 return false;
|
759 mike 1.35
|
760 kumpf 1.24 p++;
|
761 mike 1.13
|
762 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
763 david.dillard 1.32 p++;
|
764 mike 1.13
|
765 kumpf 1.43.2.11 // We've validated the prefix, now validate the local name
766 if (*p == ':')
767 {
768 localName = ++p;
769
770 if (!CharSet::isAlNumUnder(Uint8(*p)))
771 return false;
772
773 p++;
774
775 while (*p && _isInnerElementChar[Uint8(*p)])
776 p++;
777 }
778
779 return true;
780 }
781
782 Boolean XmlParser::_getElementName(char*& p, const char*& localName)
783 {
784 if (!_getQName(p, localName))
785 throw XmlException(XmlException::BAD_START_TAG, _line);
786 kumpf 1.43.2.11
|
787 mike 1.13 // The next character must be a space:
788
|
789 chuck 1.26 if (_isspace(*p))
|
790 mike 1.13 {
|
791 david.dillard 1.32 *p++ = '\0';
|
792 mike 1.34 _skipWhitespace(_line, p);
|
793 mike 1.13 }
794
795 if (*p == '>')
796 {
|
797 david.dillard 1.32 *p++ = '\0';
798 return true;
|
799 mike 1.13 }
800
801 return false;
802 }
803
|
804 kumpf 1.43.2.11 Boolean XmlParser::_getOpenElementName(
805 char*& p,
806 const char*& localName,
807 Boolean& openCloseElement)
|
808 mike 1.13 {
809 openCloseElement = false;
810
|
811 kumpf 1.43.2.11 if (!_getQName(p, localName))
|
812 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
813 mike 1.35
|
814 mike 1.13 // The next character must be a space:
815
|
816 chuck 1.26 if (_isspace(*p))
|
817 mike 1.13 {
|
818 david.dillard 1.32 *p++ = '\0';
|
819 mike 1.34 _skipWhitespace(_line, p);
|
820 mike 1.13 }
821
822 if (*p == '>')
823 {
|
824 david.dillard 1.32 *p++ = '\0';
825 return true;
|
826 mike 1.13 }
827
828 if (p[0] == '/' && p[1] == '>')
829 {
|
830 david.dillard 1.32 openCloseElement = true;
831 *p = '\0';
832 p += 2;
833 return true;
|
834 mike 1.13 }
835
836 return false;
837 }
838
|
839 kumpf 1.43.2.11 void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName)
|
840 mike 1.13 {
|
841 kumpf 1.43.2.11 if (!_getQName(p, localName))
|
842 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
843 mike 1.35
|
844 mike 1.13 char* term = p;
845
|
846 mike 1.34 _skipWhitespace(_line, p);
|
847 mike 1.13
848 if (*p != '=')
|
849 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
850 mike 1.13
851 p++;
852
|
853 mike 1.34 _skipWhitespace(_line, p);
|
854 mike 1.13
855 *term = '\0';
856 }
857
858 void XmlParser::_getComment(char*& p)
859 {
860 // Now p points to first non-whitespace character beyond "<--" sequence:
861
862 for (; *p; p++)
863 {
|
864 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
865 {
866 if (p[2] != '>')
867 {
868 throw XmlException(
869 XmlException::MINUS_MINUS_IN_COMMENT, _line);
870 }
871
872 // Find end of comment (excluding whitespace):
873
874 *p = '\0';
875 p += 3;
876 return;
877 }
|
878 mike 1.13 }
879
880 // If it got this far, then the comment is unterminated:
881
882 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
883 }
884
885 void XmlParser::_getCData(char*& p)
886 {
887 // At this point p points one past "<![CDATA[" sequence:
888
889 for (; *p; p++)
890 {
|
891 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
892 {
893 *p = '\0';
894 p += 3;
895 return;
896 }
897 else if (*p == '\n')
898 _line++;
|
899 mike 1.13 }
900
901 // If it got this far, then the comment is unterminated:
902
903 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
904 }
905
906 void XmlParser::_getDocType(char*& p)
907 {
908 // Just ignore the DOCTYPE command for now:
909
910 for (; *p && *p != '>'; p++)
911 {
|
912 david.dillard 1.32 if (*p == '\n')
913 _line++;
|
914 mike 1.13 }
915
916 if (*p != '>')
|
917 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
918 mike 1.13
919 p++;
920 }
921
922 void XmlParser::_getElement(char*& p, XmlEntry& entry)
923 {
924 //--------------------------------------------------------------------------
925 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
926 //--------------------------------------------------------------------------
927
928 if (*p == '?')
929 {
|
930 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
931 entry.text = ++p;
|
932 mike 1.13
|
933 david.dillard 1.32 Boolean openCloseElement = false;
|
934 mike 1.13
|
935 kumpf 1.43.2.11 if (_getElementName(p, entry.localName))
|
936 david.dillard 1.32 return;
|
937 mike 1.13 }
938 else if (*p == '!')
939 {
|
940 david.dillard 1.32 p++;
|
941 mike 1.13
|
942 david.dillard 1.32 // Expect a comment or CDATA:
|
943 mike 1.13
|
944 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
945 {
946 p += 2;
947 entry.type = XmlEntry::COMMENT;
948 entry.text = p;
949 _getComment(p);
950 return;
951 }
952 else if (memcmp(p, "[CDATA[", 7) == 0)
953 {
954 p += 7;
955 entry.type = XmlEntry::CDATA;
956 entry.text = p;
957 _getCData(p);
958 return;
959 }
960 else if (memcmp(p, "DOCTYPE", 7) == 0)
961 {
962 entry.type = XmlEntry::DOCTYPE;
|
963 kumpf 1.37 entry.text = "";
|
964 david.dillard 1.32 _getDocType(p);
965 return;
966 }
967 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
968 mike 1.13 }
969 else if (*p == '/')
970 {
|
971 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
972 entry.text = ++p;
|
973 mike 1.13
|
974 kumpf 1.43.2.11 if (!_getElementName(p, entry.localName))
|
975 david.dillard 1.32 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
976 mike 1.13
|
977 david.dillard 1.32 return;
|
978 mike 1.13 }
|
979 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
980 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
981 (*p == '_')))
|
982 mike 1.13 {
|
983 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
984 entry.text = p;
|
985 mike 1.13
|
986 david.dillard 1.32 Boolean openCloseElement = false;
|
987 mike 1.13
|
988 kumpf 1.43.2.11 if (_getOpenElementName(p, entry.localName, openCloseElement))
|
989 david.dillard 1.32 {
990 if (openCloseElement)
991 entry.type = XmlEntry::EMPTY_TAG;
992 return;
993 }
|
994 mike 1.13 }
995 else
|
996 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
997 mike 1.13
998 //--------------------------------------------------------------------------
999 // Grab all the attributes:
1000 //--------------------------------------------------------------------------
1001
1002 for (;;)
1003 {
|
1004 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
1005 {
1006 if (p[0] == '?' && p[1] == '>')
1007 {
1008 p += 2;
1009 return;
1010 }
1011 }
1012 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
1013 {
1014 entry.type = XmlEntry::EMPTY_TAG;
1015 p += 2;
1016 return;
1017 }
1018 else if (*p == '>')
1019 {
1020 p++;
1021 return;
1022 }
1023
1024 XmlAttribute attr;
|
1025 dmitry.mikulin 1.43.2.4 attr.nsType = -1;
|
1026 david.dillard 1.32 attr.name = p;
|
1027 kumpf 1.43.2.11 _getAttributeNameAndEqual(p, attr.localName);
|
1028 david.dillard 1.32
|
1029 kumpf 1.37 // Get the attribute value (e.g., "some value")
1030 {
1031 if ((*p != '"') && (*p != '\''))
1032 {
1033 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1034 }
1035
1036 char quote = *p++;
1037
1038 char* start;
1039 _normalize(_line, p, quote, start);
1040 attr.value = start;
1041
1042 if (*p != quote)
1043 {
1044 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1045 }
1046
1047 // Overwrite the closing quote with a null-terminator:
|
1048 david.dillard 1.32
|
1049 kumpf 1.37 *p++ = '\0';
1050 }
|
1051 david.dillard 1.32
1052 if (entry.type == XmlEntry::XML_DECLARATION)
1053 {
1054 // The next thing must a space or a "?>":
|
1055 mike 1.13
|
1056 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
1057 {
1058 throw XmlException(
1059 XmlException::BAD_ATTRIBUTE_VALUE, _line);
1060 }
1061 }
1062 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
1063 {
1064 // The next thing must be a space or a '>':
|
1065 mike 1.13
|
1066 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1067 }
|
1068 mike 1.13
|
1069 mike 1.34 _skipWhitespace(_line, p);
|
1070 david.dillard 1.32
|
1071 kumpf 1.43.2.1 entry.attributes.append(attr);
|
1072 mike 1.13 }
1073 }
1074
1075 static const char* _typeStrings[] =
1076 {
|
1077 david.dillard 1.32 "XML_DECLARATION",
1078 "START_TAG",
1079 "EMPTY_TAG",
1080 "END_TAG",
|
1081 mike 1.13 "COMMENT",
1082 "CDATA",
1083 "DOCTYPE",
|
1084 david.dillard 1.32 "CONTENT"
|
1085 mike 1.13 };
1086
1087 void XmlEntry::print() const
1088 {
1089 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1090
1091 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1092
1093 if (needQuotes)
|
1094 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1095
|
1096 mike 1.13 _printValue(text);
1097
1098 if (needQuotes)
|
1099 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1100 mike 1.13
1101 PEGASUS_STD(cout) << '\n';
1102
|
1103 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
|
1104 mike 1.13 {
|
1105 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1106 _printValue(attributes[i].value);
1107 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1108 mike 1.13 }
1109 }
1110
1111 const XmlAttribute* XmlEntry::findAttribute(
1112 const char* name) const
1113 {
|
1114 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
|
1115 mike 1.13 {
|
1116 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1117 return &attributes[i];
|
1118 mike 1.13 }
1119
1120 return 0;
1121 }
1122
|
1123 kumpf 1.43.2.6 const XmlAttribute* XmlEntry::findAttribute(
1124 int nsType,
1125 const char* name) const
1126 {
1127 for (Uint32 i = 0; i < attributes.size(); i++)
1128 {
|
1129 kumpf 1.43.2.11 if ((attributes[i].nsType == nsType) &&
1130 (strcmp(attributes[i].localName, name) == 0))
|
1131 kumpf 1.43.2.6 {
|
1132 kumpf 1.43.2.11 return &attributes[i];
|
1133 kumpf 1.43.2.6 }
1134 }
1135
1136 return 0;
1137 }
1138
|
1139 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace
1140 // character (set last one past this). For example, consider this string:
1141 //
|
1142 david.dillard 1.32 // " 87 "
|
1143 mike 1.13 //
1144 // The first pointer would point to '8' and the last pointer woudl point one
1145 // beyond '7'.
1146
1147 static void _findEnds(
|
1148 david.dillard 1.32 const char* str,
1149 const char*& first,
|
1150 mike 1.13 const char*& last)
1151 {
1152 first = str;
1153
|
1154 chuck 1.26 while (_isspace(*first))
|
1155 david.dillard 1.32 first++;
|
1156 mike 1.13
1157 if (!*first)
1158 {
|
1159 david.dillard 1.32 last = first;
1160 return;
|
1161 mike 1.13 }
1162
1163 last = first + strlen(first);
1164
|
1165 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1166 david.dillard 1.32 last--;
|
1167 mike 1.13 }
1168
1169 Boolean XmlEntry::getAttributeValue(
|
1170 david.dillard 1.32 const char* name,
|
1171 mike 1.13 Uint32& value) const
1172 {
1173 const XmlAttribute* attr = findAttribute(name);
1174
1175 if (!attr)
|
1176 david.dillard 1.32 return false;
|
1177 mike 1.13
1178 const char* first;
1179 const char* last;
1180 _findEnds(attr->value, first, last);
1181
1182 char* end = 0;
1183 long tmp = strtol(first, &end, 10);
1184
1185 if (!end || end != last)
|
1186 david.dillard 1.32 return false;
|
1187 mike 1.13
1188 value = Uint32(tmp);
1189 return true;
1190 }
1191
1192 Boolean XmlEntry::getAttributeValue(
|
1193 david.dillard 1.32 const char* name,
|
1194 mike 1.13 Real32& value) const
1195 {
1196 const XmlAttribute* attr = findAttribute(name);
1197
1198 if (!attr)
|
1199 david.dillard 1.32 return false;
|
1200 mike 1.13
1201 const char* first;
1202 const char* last;
1203 _findEnds(attr->value, first, last);
1204
1205 char* end = 0;
1206 double tmp = strtod(first, &end);
1207
1208 if (!end || end != last)
|
1209 david.dillard 1.32 return false;
|
1210 mike 1.13
|
1211 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1212 mike 1.13 return true;
1213 }
1214
1215 Boolean XmlEntry::getAttributeValue(
|
1216 david.dillard 1.32 const char* name,
|
1217 mike 1.13 const char*& value) const
1218 {
1219 const XmlAttribute* attr = findAttribute(name);
1220
1221 if (!attr)
|
1222 david.dillard 1.32 return false;
|
1223 mike 1.13
1224 value = attr->value;
1225 return true;
1226 }
1227
1228 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1229 {
1230 const char* tmp;
1231
1232 if (!getAttributeValue(name, tmp))
|
1233 david.dillard 1.32 return false;
|
1234 mike 1.13
|
1235 chuck 1.28 value = String(tmp);
|
1236 mike 1.13 return true;
1237 }
1238
|
1239 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1240 mike 1.13 {
|
1241 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1242 mike 1.13 }
1243
1244 PEGASUS_NAMESPACE_END
|