1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 ////////////////////////////////////////////////////////////////////////////////
35 //
36 // XmlParser
37 //
|
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
39 // serveral rules for well-formed XML:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
42 mike 1.13 //
|
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
44 mike 1.13 //
|
45 david.dillard 1.32 // 2. Comments have the form:
|
46 mike 1.13 //
|
47 david.dillard 1.32 // <!-- blah blah blah -->
|
48 mike 1.13 //
|
49 david.dillard 1.32 // 3. The following entity references are supported:
|
50 mike 1.13 //
|
51 david.dillard 1.32 // & - ampersand
52 // < - less-than
53 // > - greater-than
54 // " - full quote
55 // &apos - apostrophe
|
56 mike 1.13 //
|
57 kumpf 1.18 // as well as character (numeric) references:
|
58 mike 1.35 //
|
59 kumpf 1.18 // 1 - decimal reference for character '1'
60 // 1 - hexadecimal reference for character '1'
61 //
|
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
63 mike 1.13 //
|
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
65 mike 1.13 //
|
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
67 mike 1.13 //
|
68 david.dillard 1.32 // <![CDATA[
69 // ...
70 // ]]>
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
73 mike 1.13 //
|
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
75 // XmlAttribute values must be delimited.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 8. <!DOCTYPE...>
|
78 mike 1.13 //
79 // TODO:
80 //
|
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
83 mike 1.13 // rules rather than references to files).
84 //
|
85 david.dillard 1.32 // Remove newlines from string literals:
|
86 mike 1.13 //
87 // Example: <xyz x="hello
|
88 david.dillard 1.32 // world">
|
89 mike 1.13 //
90 ////////////////////////////////////////////////////////////////////////////////
91
|
92 sage 1.14 #include <Pegasus/Common/Config.h>
|
93 mike 1.13 #include <cctype>
94 #include <cstdio>
95 #include <cstdlib>
96 #include <cstring>
97 #include "XmlParser.h"
98 #include "Logger.h"
|
99 chuck 1.19 #include "ExceptionRep.h"
|
100 mike 1.34 #include "CharSet.h"
|
101 mike 1.13
102 PEGASUS_NAMESPACE_BEGIN
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
|
114 david.dillard 1.32 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
|
122 mike 1.13 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
|
142 chuck 1.26
143 // Implements a check for a whitespace character, without calling
144 // isspace( ). The isspace( ) function is locale-sensitive,
145 // and incorrectly flags some chars above 0x7f as whitespace. This
146 // causes the XmlParser to incorrectly parse UTF-8 data.
147 //
148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
149 // defines white space as:
|
150 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
151 mike 1.34 static inline int _isspace(char c)
|
152 chuck 1.26 {
|
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
154 chuck 1.26 }
155
|
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
157
158 ////////////////////////////////////////////////////////////////////////////////
159 //
160 // XmlException
161 //
162 ////////////////////////////////////////////////////////////////////////////////
163
164 static const char* _xmlMessages[] =
165 {
166 "Bad opening element",
167 "Bad closing element",
168 "Bad attribute name",
169 "Exepected equal sign",
170 "Bad attribute value",
171 "A \"--\" sequence found within comment",
172 "Unterminated comment",
173 "Unterminated CDATA block",
174 "Unterminated DOCTYPE",
175 "Too many attributes: parser only handles 10",
176 "Malformed reference",
177 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence",
178 "Closing element does not match opening element",
179 "One or more tags are still open",
180 "More than one root element was encountered",
181 "Validation error",
|
182 dmitry.mikulin 1.43.2.4 "Semantic error",
183 "Malformed namespace declaration",
184 "Namespace not supported",
185 "Namespace not declared"
|
186 mike 1.13 };
187
|
188 david.dillard 1.32 static const char* _xmlKeys[] =
|
189 chuck 1.19 {
|
190 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
191 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
192 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
193 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
194 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
195 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
196 "Common.XmlParser.UNTERMINATED_COMMENT",
197 "Common.XmlParser.UNTERMINATED_CDATA",
198 "Common.XmlParser.UNTERMINATED_DOCTYPE",
199 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
200 "Common.XmlParser.MALFORMED_REFERENCE",
201 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
202 "Common.XmlParser.START_END_MISMATCH",
|
203 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
204 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
205 "Common.XmlParser.VALIDATION_ERROR",
|
206 dmitry.mikulin 1.43.2.4 "Common.XmlParser.SEMANTIC_ERROR",
207 "Common.XmlParser.MALFORMED_NAMESPACE_DECL",
208 "Common.XmlParser.UNSUPPORTED_NAMESPACE",
209 "Common.XmlParser.UNDECLARED_NAMESPACE"
|
210 chuck 1.19 };
211
212
|
213 kumpf 1.40 static MessageLoaderParms _formMessage(
214 Uint32 code,
215 Uint32 line,
216 const String& message)
|
217 chuck 1.19 {
218 String dftMsg = _xmlMessages[Uint32(code) - 1];
219 String key = _xmlKeys[Uint32(code) - 1];
|
220 dmitry.mikulin 1.43.2.5 String msg = message;
|
221 chuck 1.19
222 dftMsg.append(": on line $0");
223 if (message.size())
224 {
|
225 david.dillard 1.32 msg = ": " + msg;
226 dftMsg.append("$1");
227 }
|
228 chuck 1.19
229 return MessageLoaderParms(key, dftMsg, line ,msg);
230 }
231
232 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
233 {
234 String dftMsg = _xmlMessages[Uint32(code) - 1];
235 String key = _xmlKeys[Uint32(code) - 1];
236
237 dftMsg.append(": on line $0");
|
238 david.dillard 1.32
|
239 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
240 }
241
|
242 mike 1.13
243 XmlException::XmlException(
|
244 david.dillard 1.32 XmlException::Code code,
|
245 mike 1.13 Uint32 lineNumber,
|
246 david.dillard 1.32 const String& message)
|
247 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
248 {
249
250 }
251
|
252 chuck 1.19
253 XmlException::XmlException(
|
254 david.dillard 1.32 XmlException::Code code,
|
255 chuck 1.19 Uint32 lineNumber,
|
256 david.dillard 1.32 MessageLoaderParms& msgParms)
|
257 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
258 {
|
259 david.dillard 1.32 if (msgParms.default_msg.size())
|
260 humberto 1.21 {
|
261 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
262 }
263 _rep->message.append(MessageLoader::getMessage(msgParms));
|
264 chuck 1.19 }
265
266
|
267 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
268 //
269 // XmlValidationError
270 //
271 ////////////////////////////////////////////////////////////////////////////////
272
273 XmlValidationError::XmlValidationError(
274 Uint32 lineNumber,
275 const String& message)
276 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
277 {
278 }
279
|
280 chuck 1.19
281 XmlValidationError::XmlValidationError(
282 Uint32 lineNumber,
283 MessageLoaderParms& msgParms)
284 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
285 {
286 }
287
288
|
289 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
290 //
291 // XmlSemanticError
292 //
293 ////////////////////////////////////////////////////////////////////////////////
294
295 XmlSemanticError::XmlSemanticError(
296 Uint32 lineNumber,
297 const String& message)
298 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
299 {
300 }
|
301 chuck 1.19
302
303 XmlSemanticError::XmlSemanticError(
304 Uint32 lineNumber,
305 MessageLoaderParms& msgParms)
306 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
307 {
308 }
309
|
310 mike 1.13
311 ////////////////////////////////////////////////////////////////////////////////
312 //
313 // XmlParser
314 //
315 ////////////////////////////////////////////////////////////////////////////////
316
|
317 dmitry.mikulin 1.43.2.2 XmlParser::XmlParser(char* text, XmlNamespace* ns)
|
318 kumpf 1.40 : _line(1),
319 _current(text),
320 _restoreChar('\0'),
|
321 dmitry.mikulin 1.43.2.2 _foundRoot(false),
322 _scopeLevel(0),
323 _supportedNamespaces(ns)
|
324 mike 1.13 {
325 }
326
|
327 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
328 {
329 while (*p && _isspace(*p))
330 {
331 if (*p == '\n')
332 line++;
333
334 p++;
335 }
336 }
337
|
338 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
339 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
340 #pragma optimize( "", off )
341 #endif
|
342 kumpf 1.37 static int _getEntityRef(char*& p)
343 {
344 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
345 {
346 p += 3;
347 return '>';
348 }
349
350 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
351 {
352 p += 3;
353 return '<';
354 }
355
356 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
357 (p[4] == ';'))
358 {
359 p += 5;
360 return '\'';
361 }
362
363 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
364 (p[4] == ';'))
365 {
366 p += 5;
367 return '"';
368 }
369
370 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
371 {
372 p += 4;
373 return '&';
374 }
375
376 return -1;
377 }
|
378 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
379 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
380 #pragma optimize( "", on )
381 #endif
|
382 kumpf 1.37
383 static inline int _getCharRef(char*& p, bool hex)
384 {
385 char* end;
386 unsigned long ch;
387
388 if (hex)
389 {
390 ch = strtoul(p, &end, 16);
391 }
392 else
393 {
394 ch = strtoul(p, &end, 10);
395 }
396
397 if ((end == p) || (*end != ';') || (ch > 255))
398 {
399 return -1;
400 }
401
402 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
403 kumpf 1.37 {
404 return -1;
405 }
406
407 p = end + 1;
408
409 return ch;
410 }
411
412 static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
413 {
414 // Skip over leading whitespace:
415
416 _skipWhitespace(line, p);
417 start = p;
418
419 // Process one character at a time:
420
421 char* q = p;
422
423 while (*p && (*p != end_char))
424 kumpf 1.37 {
425 if (_isspace(*p))
426 {
427 // Compress sequences of whitespace characters to a single space
428 // character. Update line number when newlines encountered.
429
430 if (*p++ == '\n')
431 {
432 line++;
433 }
434
435 *q++ = ' ';
436
437 _skipWhitespace(line, p);
438 }
439 else if (*p == '&')
440 {
441 // Process entity characters and entity references:
442
443 p++;
444 int ch;
445 kumpf 1.37
446 if (*p == '#')
447 {
448 *p++;
449
450 if (*p == 'x')
451 {
452 p++;
453 ch = _getCharRef(p, true);
454 }
455 else
456 {
457 ch = _getCharRef(p, false);
458 }
459 }
460 else
461 {
462 ch = _getEntityRef(p);
463 }
464
465 if (ch == -1)
466 kumpf 1.37 {
467 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
468 }
469
470 *q++ = ch;
471 }
472 else
473 {
474 *q++ = *p++;
475 }
476 }
477
|
478 kumpf 1.40 // We encountered a the end_char or a zero-terminator.
|
479 kumpf 1.37
480 *q = *p;
481
482 // Remove single trailing whitespace (consecutive whitespaces already
483 // compressed above). Since p >= q, we can tell if we need to strip a
484 // trailing space from q by looking at the end of p. We must not look at
485 // the last character of p, though, if p is an empty string.
486
487 if ((p != start) && _isspace(p[-1]))
488 {
489 q--;
490 }
491
492 // If q got behind p, it is safe and necessary to null-terminate q
493
494 if (q != p)
495 {
496 *q = '\0';
497 }
498 }
499
|
500 kumpf 1.43.2.1 Boolean XmlParser::next(
501 XmlEntry& entry,
|
502 dmitry.mikulin 1.43.2.2 Boolean includeComment)
|
503 mike 1.13 {
|
504 kumpf 1.43.2.1 entry.attributes.clear();
505
|
506 mike 1.13 if (!_putBackStack.isEmpty())
507 {
|
508 david.dillard 1.32 entry = _putBackStack.top();
509 _putBackStack.pop();
510 return true;
|
511 mike 1.13 }
512
513 // If a character was overwritten with a null-terminator the last
514 // time this routine was called, then put back that character. Before
515 // exiting of course, restore the null-terminator.
516
517 char* nullTerminator = 0;
518
519 if (_restoreChar && !*_current)
520 {
|
521 david.dillard 1.32 nullTerminator = _current;
522 *_current = _restoreChar;
523 _restoreChar = '\0';
|
524 mike 1.13 }
525
|
526 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false.
527 do
528 {
529 // Skip over any whitespace:
530 _skipWhitespace(_line, _current);
531
532 if (!*_current)
533 {
534 if (nullTerminator)
535 *nullTerminator = '\0';
|
536 mike 1.13
|
537 venkat.puvvada 1.41 if (!_stack.isEmpty())
538 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
539 mike 1.13
|
540 venkat.puvvada 1.41 return false;
541 }
|
542 mike 1.13
|
543 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
|
544 mike 1.13
|
545 venkat.puvvada 1.41 if (*_current == '<')
546 {
547 _current++;
548 _getElement(_current, entry);
|
549 mike 1.13
|
550 venkat.puvvada 1.41 if (nullTerminator)
551 *nullTerminator = '\0';
|
552 mike 1.13
|
553 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG)
554 {
555 if (_stack.isEmpty() && _foundRoot)
556 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
|
557 mike 1.13
|
558 venkat.puvvada 1.41 _foundRoot = true;
559 _stack.push((char*)entry.text);
560 }
561 else if (entry.type == XmlEntry::END_TAG)
562 {
563 if (_stack.isEmpty())
564 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
565 mike 1.13
|
566 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0)
567 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
568 david.dillard 1.32
|
569 venkat.puvvada 1.41 _stack.pop();
570 }
|
571 david.dillard 1.32 }
|
572 venkat.puvvada 1.41 else
|
573 david.dillard 1.32 {
|
574 venkat.puvvada 1.41 // Normalize the content:
|
575 mike 1.13
|
576 venkat.puvvada 1.41 char* start;
577 _normalize(_line, _current, '<', start);
|
578 mike 1.13
|
579 venkat.puvvada 1.41 // Get the content:
|
580 mike 1.13
|
581 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT;
582 entry.text = start;
|
583 kumpf 1.37
|
584 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
|
585 kumpf 1.37
|
586 venkat.puvvada 1.41 _restoreChar = *_current;
587 *_current = '\0';
|
588 kumpf 1.37
|
589 venkat.puvvada 1.41 if (nullTerminator)
590 *nullTerminator = '\0';
591 }
|
592 dmitry.mikulin 1.43.2.2 } while (!includeComment && entry.type == XmlEntry::COMMENT);
593
|
594 kumpf 1.43.2.3 if (_supportedNamespaces &&
595 (entry.type == XmlEntry::START_TAG ||
596 entry.type == XmlEntry::EMPTY_TAG ||
597 entry.type == XmlEntry::END_TAG))
|
598 dmitry.mikulin 1.43.2.2 {
599 // Process attributes and enter namespaces into the table
600 if (entry.type == XmlEntry::START_TAG ||
601 entry.type == XmlEntry::EMPTY_TAG)
602 {
603 _scopeLevel++;
604 for (unsigned int i = 0; i < entry.attributes.size(); i++)
605 {
|
606 kumpf 1.43.2.3 XmlAttribute& attr = entry.attributes[i];
|
607 dmitry.mikulin 1.43.2.4 if (strncmp(attr.name, "xmlns", 5) == 0)
|
608 dmitry.mikulin 1.43.2.2 {
609 XmlNamespace ns;
|
610 dmitry.mikulin 1.43.2.4 if (attr.name[5] == ':')
|
611 dmitry.mikulin 1.43.2.2 {
|
612 dmitry.mikulin 1.43.2.4 ns.localName = attr.name + 6;
613
614 // Check if we have malformed XML of the form:
615 // "xmlns:=URI". In this case attr.name will be set
616 // to "xmlns:" and ns.localName will point to '\0'
617 if (ns.localName[0] == '\0')
618 {
619 throw XmlException(
620 XmlException::MALFORMED_NAMESPACE_DECL,
621 _line);
622 }
|
623 dmitry.mikulin 1.43.2.2 }
624 else
625 {
|
626 dmitry.mikulin 1.43.2.4 // Default name space has no local name
627 ns.localName = 0;
|
628 dmitry.mikulin 1.43.2.2 }
|
629 dmitry.mikulin 1.43.2.4 ns.extendedName = attr.value;
630 ns.scopeLevel = _scopeLevel;
631 ns.type = getSupportedNamespaceType(ns.extendedName);
632
633 // Even unsupported namespaces get pushed onto the stack.
634 // We will throw an exception of there is an attempt to
635 // reference an unsupported namespace later.
636 _nameSpaces.push(ns);
637 }
638 else
639 {
640 // Attribute names may also be namespace qualified.
641 attr.nsType = _getNamespaceType(attr.name);
|
642 dmitry.mikulin 1.43.2.2 }
643 }
644 }
|
645 kumpf 1.43.2.3
|
646 dmitry.mikulin 1.43.2.4 // Get the namespace type for this tag.
647 entry.nsType = _getNamespaceType(entry.text);
|
648 dmitry.mikulin 1.43.2.2
|
649 kumpf 1.43.2.3 if (entry.type == XmlEntry::END_TAG ||
650 entry.type == XmlEntry::EMPTY_TAG)
651 {
|
652 dmitry.mikulin 1.43.2.2 // Remove any namespaces of the current scope level from
653 // the scope stack.
654 while (!_nameSpaces.isEmpty() &&
655 _scopeLevel <= _nameSpaces.top().scopeLevel)
656 {
657 _nameSpaces.pop();
658 }
659
660 PEGASUS_ASSERT(_scopeLevel > 0);
661 _scopeLevel--;
662 }
663 }
664 else
665 {
666 entry.nsType = -1;
667 }
|
668 kumpf 1.37
|
669 venkat.puvvada 1.41 return true;
|
670 mike 1.13 }
671
|
672 dmitry.mikulin 1.43.2.2 // Get the namespace type of the given tag
673 int XmlParser::_getNamespaceType(const char* tag)
674 {
675 const char* pos = strchr(tag, ':');
676
|
677 dmitry.mikulin 1.43.2.4 // If ":" is not found, the tag is not namespace qualified and we
678 // need to look for the default name space.
|
679 dmitry.mikulin 1.43.2.2
680 // Search the namespace stack from the top
681 for (int i = _nameSpaces.size() - 1; i >=0; i--)
682 {
|
683 dmitry.mikulin 1.43.2.4 // If ":" is found, look for the name space with the matching
684 // local name...
685 if ((pos && _nameSpaces[i].localName &&
686 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
687 // ... otherwise look for the default name space. It's the
688 // one with localName set to NULL
689 (!pos && !_nameSpaces[i].localName))
690 {
691 // If it's a reference to an unsupported namespace,
692 // throw an exception
693 if (_nameSpaces[i].type == -1)
694 {
695 throw XmlException(XmlException::UNSUPPORTED_NAMESPACE, _line);
696 }
|
697 dmitry.mikulin 1.43.2.2 return _nameSpaces[i].type;
698 }
699 }
|
700 dmitry.mikulin 1.43.2.4
701 // If the tag is namespace qualified, but the name space has not been
702 // declared, it's malformed XML and we must throw an exception
703 if (pos)
704 {
705 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
706 }
707
708 // Otherwise it's OK not to have a name space.
|
709 dmitry.mikulin 1.43.2.2 return -1;
710 }
711
712 // Gived the extended namespace name, find it in the table of supported
713 // namespaces and return its type.
714 int XmlParser::getSupportedNamespaceType(const char* extendedName)
715 {
716 for (int i = 0;
717 _supportedNamespaces[i].localName != 0;
718 i++)
719 {
720 PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
721 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
722 {
723 return _supportedNamespaces[i].type;
724 }
725 }
726 return -1;
727 }
728
729 XmlNamespace* XmlParser::getNamespace(int nsType)
730 dmitry.mikulin 1.43.2.2 {
731 for (int i = _nameSpaces.size() - 1; i >=0; i--)
732 {
733 if (_nameSpaces[i].type == nsType)
734 {
735 return &_nameSpaces[i];
736 }
737 }
738 return 0;
739 }
740
|
741 mike 1.13 void XmlParser::putBack(XmlEntry& entry)
742 {
743 _putBackStack.push(entry);
744 }
745
746 XmlParser::~XmlParser()
747 {
748 // Nothing to do!
749 }
750
|
751 mike 1.35 // A-Za-z0-9_-:.
|
752 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
753 mike 1.35 {
754 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
755 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
756 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
757 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
758 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
759 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
760 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
761 };
762
|
763 mike 1.13 Boolean XmlParser::_getElementName(char*& p)
764 {
|
765 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
766 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
767 mike 1.35
|
768 kumpf 1.24 p++;
|
769 mike 1.13
|
770 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
771 david.dillard 1.32 p++;
|
772 mike 1.13
773 // The next character must be a space:
774
|
775 chuck 1.26 if (_isspace(*p))
|
776 mike 1.13 {
|
777 david.dillard 1.32 *p++ = '\0';
|
778 mike 1.34 _skipWhitespace(_line, p);
|
779 mike 1.13 }
780
781 if (*p == '>')
782 {
|
783 david.dillard 1.32 *p++ = '\0';
784 return true;
|
785 mike 1.13 }
786
787 return false;
788 }
789
790 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
791 {
792 openCloseElement = false;
793
|
794 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
795 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
796 mike 1.35
|
797 kumpf 1.24 p++;
|
798 mike 1.13
|
799 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
800 david.dillard 1.32 p++;
|
801 mike 1.13
802 // The next character must be a space:
803
|
804 chuck 1.26 if (_isspace(*p))
|
805 mike 1.13 {
|
806 david.dillard 1.32 *p++ = '\0';
|
807 mike 1.34 _skipWhitespace(_line, p);
|
808 mike 1.13 }
809
810 if (*p == '>')
811 {
|
812 david.dillard 1.32 *p++ = '\0';
813 return true;
|
814 mike 1.13 }
815
816 if (p[0] == '/' && p[1] == '>')
817 {
|
818 david.dillard 1.32 openCloseElement = true;
819 *p = '\0';
820 p += 2;
821 return true;
|
822 mike 1.13 }
823
824 return false;
825 }
826
827 void XmlParser::_getAttributeNameAndEqual(char*& p)
828 {
|
829 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
|
830 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
831 mike 1.35
|
832 kumpf 1.24 p++;
|
833 mike 1.13
|
834 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
835 david.dillard 1.32 p++;
|
836 mike 1.13
837 char* term = p;
838
|
839 mike 1.34 _skipWhitespace(_line, p);
|
840 mike 1.13
841 if (*p != '=')
|
842 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
843 mike 1.13
844 p++;
845
|
846 mike 1.34 _skipWhitespace(_line, p);
|
847 mike 1.13
848 *term = '\0';
849 }
850
851 void XmlParser::_getComment(char*& p)
852 {
853 // Now p points to first non-whitespace character beyond "<--" sequence:
854
855 for (; *p; p++)
856 {
|
857 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
858 {
859 if (p[2] != '>')
860 {
861 throw XmlException(
862 XmlException::MINUS_MINUS_IN_COMMENT, _line);
863 }
864
865 // Find end of comment (excluding whitespace):
866
867 *p = '\0';
868 p += 3;
869 return;
870 }
|
871 mike 1.13 }
872
873 // If it got this far, then the comment is unterminated:
874
875 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
876 }
877
878 void XmlParser::_getCData(char*& p)
879 {
880 // At this point p points one past "<![CDATA[" sequence:
881
882 for (; *p; p++)
883 {
|
884 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
885 {
886 *p = '\0';
887 p += 3;
888 return;
889 }
890 else if (*p == '\n')
891 _line++;
|
892 mike 1.13 }
893
894 // If it got this far, then the comment is unterminated:
895
896 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
897 }
898
899 void XmlParser::_getDocType(char*& p)
900 {
901 // Just ignore the DOCTYPE command for now:
902
903 for (; *p && *p != '>'; p++)
904 {
|
905 david.dillard 1.32 if (*p == '\n')
906 _line++;
|
907 mike 1.13 }
908
909 if (*p != '>')
|
910 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
911 mike 1.13
912 p++;
913 }
914
915 void XmlParser::_getElement(char*& p, XmlEntry& entry)
916 {
917 //--------------------------------------------------------------------------
918 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
919 //--------------------------------------------------------------------------
920
921 if (*p == '?')
922 {
|
923 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
924 entry.text = ++p;
|
925 mike 1.13
|
926 david.dillard 1.32 Boolean openCloseElement = false;
|
927 mike 1.13
|
928 david.dillard 1.32 if (_getElementName(p))
929 return;
|
930 mike 1.13 }
931 else if (*p == '!')
932 {
|
933 david.dillard 1.32 p++;
|
934 mike 1.13
|
935 david.dillard 1.32 // Expect a comment or CDATA:
|
936 mike 1.13
|
937 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
938 {
939 p += 2;
940 entry.type = XmlEntry::COMMENT;
941 entry.text = p;
942 _getComment(p);
943 return;
944 }
945 else if (memcmp(p, "[CDATA[", 7) == 0)
946 {
947 p += 7;
948 entry.type = XmlEntry::CDATA;
949 entry.text = p;
950 _getCData(p);
951 return;
952 }
953 else if (memcmp(p, "DOCTYPE", 7) == 0)
954 {
955 entry.type = XmlEntry::DOCTYPE;
|
956 kumpf 1.37 entry.text = "";
|
957 david.dillard 1.32 _getDocType(p);
958 return;
959 }
960 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
961 mike 1.13 }
962 else if (*p == '/')
963 {
|
964 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
965 entry.text = ++p;
|
966 mike 1.13
|
967 david.dillard 1.32 if (!_getElementName(p))
968 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
969 mike 1.13
|
970 david.dillard 1.32 return;
|
971 mike 1.13 }
|
972 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
973 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
974 (*p == '_')))
|
975 mike 1.13 {
|
976 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
977 entry.text = p;
|
978 mike 1.13
|
979 david.dillard 1.32 Boolean openCloseElement = false;
|
980 mike 1.13
|
981 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement))
982 {
983 if (openCloseElement)
984 entry.type = XmlEntry::EMPTY_TAG;
985 return;
986 }
|
987 mike 1.13 }
988 else
|
989 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
990 mike 1.13
991 //--------------------------------------------------------------------------
992 // Grab all the attributes:
993 //--------------------------------------------------------------------------
994
995 for (;;)
996 {
|
997 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
998 {
999 if (p[0] == '?' && p[1] == '>')
1000 {
1001 p += 2;
1002 return;
1003 }
1004 }
1005 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
1006 {
1007 entry.type = XmlEntry::EMPTY_TAG;
1008 p += 2;
1009 return;
1010 }
1011 else if (*p == '>')
1012 {
1013 p++;
1014 return;
1015 }
1016
1017 XmlAttribute attr;
|
1018 dmitry.mikulin 1.43.2.4 attr.nsType = -1;
|
1019 david.dillard 1.32 attr.name = p;
1020 _getAttributeNameAndEqual(p);
1021
|
1022 kumpf 1.37 // Get the attribute value (e.g., "some value")
1023 {
1024 if ((*p != '"') && (*p != '\''))
1025 {
1026 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1027 }
1028
1029 char quote = *p++;
1030
1031 char* start;
1032 _normalize(_line, p, quote, start);
1033 attr.value = start;
1034
1035 if (*p != quote)
1036 {
1037 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1038 }
1039
1040 // Overwrite the closing quote with a null-terminator:
|
1041 david.dillard 1.32
|
1042 kumpf 1.37 *p++ = '\0';
1043 }
|
1044 david.dillard 1.32
1045 if (entry.type == XmlEntry::XML_DECLARATION)
1046 {
1047 // The next thing must a space or a "?>":
|
1048 mike 1.13
|
1049 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
1050 {
1051 throw XmlException(
1052 XmlException::BAD_ATTRIBUTE_VALUE, _line);
1053 }
1054 }
1055 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
1056 {
1057 // The next thing must be a space or a '>':
|
1058 mike 1.13
|
1059 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
1060 }
|
1061 mike 1.13
|
1062 mike 1.34 _skipWhitespace(_line, p);
|
1063 david.dillard 1.32
|
1064 kumpf 1.43.2.1 entry.attributes.append(attr);
|
1065 mike 1.13 }
1066 }
1067
1068 static const char* _typeStrings[] =
1069 {
|
1070 david.dillard 1.32 "XML_DECLARATION",
1071 "START_TAG",
1072 "EMPTY_TAG",
1073 "END_TAG",
|
1074 mike 1.13 "COMMENT",
1075 "CDATA",
1076 "DOCTYPE",
|
1077 david.dillard 1.32 "CONTENT"
|
1078 mike 1.13 };
1079
1080 void XmlEntry::print() const
1081 {
1082 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1083
1084 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1085
1086 if (needQuotes)
|
1087 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1088
|
1089 mike 1.13 _printValue(text);
1090
1091 if (needQuotes)
|
1092 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1093 mike 1.13
1094 PEGASUS_STD(cout) << '\n';
1095
|
1096 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
|
1097 mike 1.13 {
|
1098 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1099 _printValue(attributes[i].value);
1100 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1101 mike 1.13 }
1102 }
1103
1104 const XmlAttribute* XmlEntry::findAttribute(
1105 const char* name) const
1106 {
|
1107 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
|
1108 mike 1.13 {
|
1109 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1110 return &attributes[i];
|
1111 mike 1.13 }
1112
1113 return 0;
1114 }
1115
1116 // Find first non-whitespace character (set first) and last non-whitespace
1117 // character (set last one past this). For example, consider this string:
1118 //
|
1119 david.dillard 1.32 // " 87 "
|
1120 mike 1.13 //
1121 // The first pointer would point to '8' and the last pointer woudl point one
1122 // beyond '7'.
1123
1124 static void _findEnds(
|
1125 david.dillard 1.32 const char* str,
1126 const char*& first,
|
1127 mike 1.13 const char*& last)
1128 {
1129 first = str;
1130
|
1131 chuck 1.26 while (_isspace(*first))
|
1132 david.dillard 1.32 first++;
|
1133 mike 1.13
1134 if (!*first)
1135 {
|
1136 david.dillard 1.32 last = first;
1137 return;
|
1138 mike 1.13 }
1139
1140 last = first + strlen(first);
1141
|
1142 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1143 david.dillard 1.32 last--;
|
1144 mike 1.13 }
1145
1146 Boolean XmlEntry::getAttributeValue(
|
1147 david.dillard 1.32 const char* name,
|
1148 mike 1.13 Uint32& value) const
1149 {
1150 const XmlAttribute* attr = findAttribute(name);
1151
1152 if (!attr)
|
1153 david.dillard 1.32 return false;
|
1154 mike 1.13
1155 const char* first;
1156 const char* last;
1157 _findEnds(attr->value, first, last);
1158
1159 char* end = 0;
1160 long tmp = strtol(first, &end, 10);
1161
1162 if (!end || end != last)
|
1163 david.dillard 1.32 return false;
|
1164 mike 1.13
1165 value = Uint32(tmp);
1166 return true;
1167 }
1168
1169 Boolean XmlEntry::getAttributeValue(
|
1170 david.dillard 1.32 const char* name,
|
1171 mike 1.13 Real32& value) const
1172 {
1173 const XmlAttribute* attr = findAttribute(name);
1174
1175 if (!attr)
|
1176 david.dillard 1.32 return false;
|
1177 mike 1.13
1178 const char* first;
1179 const char* last;
1180 _findEnds(attr->value, first, last);
1181
1182 char* end = 0;
1183 double tmp = strtod(first, &end);
1184
1185 if (!end || end != last)
|
1186 david.dillard 1.32 return false;
|
1187 mike 1.13
|
1188 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1189 mike 1.13 return true;
1190 }
1191
1192 Boolean XmlEntry::getAttributeValue(
|
1193 david.dillard 1.32 const char* name,
|
1194 mike 1.13 const char*& value) const
1195 {
1196 const XmlAttribute* attr = findAttribute(name);
1197
1198 if (!attr)
|
1199 david.dillard 1.32 return false;
|
1200 mike 1.13
1201 value = attr->value;
1202 return true;
1203 }
1204
1205 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1206 {
1207 const char* tmp;
1208
1209 if (!getAttributeValue(name, tmp))
|
1210 david.dillard 1.32 return false;
|
1211 mike 1.13
|
1212 chuck 1.28 value = String(tmp);
|
1213 mike 1.13 return true;
1214 }
1215
|
1216 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1217 mike 1.13 {
|
1218 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1219 mike 1.13 }
1220
1221 PEGASUS_NAMESPACE_END
|