pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 martin 1.51 //%LICENSE////////////////////////////////////////////////////////////////
2 martin 1.52 //
3 martin 1.51 // Licensed to The Open Group (TOG) under one or more contributor license 4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with 5 // this work for additional information regarding copyright ownership. 6 // Each contributor licenses this file to you under the OpenPegasus Open 7 // Source License; you may not use this file except in compliance with the 8 // License.
9 martin 1.52 //
10 martin 1.51 // Permission is hereby granted, free of charge, to any person obtaining a 11 // copy of this software and associated documentation files (the "Software"), 12 // to deal in the Software without restriction, including without limitation 13 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 // and/or sell copies of the Software, and to permit persons to whom the 15 // Software is furnished to do so, subject to the following conditions:
16 martin 1.52 //
17 martin 1.51 // The above copyright notice and this permission notice shall be included 18 // in all copies or substantial portions of the Software.
19 martin 1.52 //
20 martin 1.51 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 martin 1.52 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 martin 1.51 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 martin 1.52 //
28 martin 1.51 //////////////////////////////////////////////////////////////////////////
29 mike 1.13 // 30 //%///////////////////////////////////////////////////////////////////////////// 31 32 //////////////////////////////////////////////////////////////////////////////// 33 // 34 // XmlParser 35 //
36 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are 37 // serveral rules for well-formed XML:
38 mike 1.13 //
39 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
40 mike 1.13 //
41 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
42 mike 1.13 //
43 david.dillard 1.32 // 2. Comments have the form:
44 mike 1.13 //
45 david.dillard 1.32 // <!-- blah blah blah -->
46 mike 1.13 //
47 david.dillard 1.32 // 3. The following entity references are supported:
48 mike 1.13 //
49 david.dillard 1.32 // &amp - ampersand 50 // &lt - less-than 51 // &gt - greater-than 52 // &quot - full quote 53 // &apos - apostrophe
54 mike 1.13 //
55 kumpf 1.18 // as well as character (numeric) references:
56 mike 1.35 //
57 kumpf 1.18 // 1 - decimal reference for character '1' 58 // 1 - hexadecimal reference for character '1' 59 //
60 david.dillard 1.32 // 4. Element names and attribute names take the following form:
61 mike 1.13 //
62 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
63 mike 1.13 //
64 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
65 mike 1.13 //
66 david.dillard 1.32 // <![CDATA[ 67 // ... 68 // ]]>
69 mike 1.13 //
70 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
71 mike 1.13 //
72 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes. 73 // XmlAttribute values must be delimited.
74 mike 1.13 //
75 david.dillard 1.32 // 8. <!DOCTYPE...>
76 mike 1.13 // 77 // TODO: 78 //
79 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
80 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
81 mike 1.13 // rules rather than references to files). 82 //
83 david.dillard 1.32 // Remove newlines from string literals:
84 mike 1.13 // 85 // Example: <xyz x="hello
86 david.dillard 1.32 // world">
87 mike 1.13 // 88 //////////////////////////////////////////////////////////////////////////////// 89
90 sage 1.14 #include <Pegasus/Common/Config.h>
91 mike 1.13 #include <cctype> 92 #include <cstdio> 93 #include <cstdlib> 94 #include <cstring> 95 #include "XmlParser.h" 96 #include "Logger.h"
97 chuck 1.19 #include "ExceptionRep.h"
98 mike 1.34 #include "CharSet.h"
99 mike 1.13 100 PEGASUS_NAMESPACE_BEGIN 101 102 //////////////////////////////////////////////////////////////////////////////// 103 // 104 // Static helper functions 105 // 106 //////////////////////////////////////////////////////////////////////////////// 107 108 static void _printValue(const char* p) 109 { 110 for (; *p; p++) 111 {
112 david.dillard 1.32 if (p == '\n') 113 PEGASUS_STD(cout) << "\\n"; 114 else if (p == '\r') 115 PEGASUS_STD(cout) << "\\r"; 116 else if (p == '\t') 117 PEGASUS_STD(cout) << "\\t"; 118 else 119 PEGASUS_STD(cout) << p;
120 mike 1.13 } 121 } 122 123 struct EntityReference 124 { 125 const char* match; 126 Uint32 length; 127 char replacement; 128 }; 129
130 chuck 1.26 // Implements a check for a whitespace character, without calling 131 // isspace( ). The isspace( ) function is locale-sensitive, 132 // and incorrectly flags some chars above 0x7f as whitespace. This 133 // causes the XmlParser to incorrectly parse UTF-8 data. 134 // 135 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) 136 // defines white space as:
137 david.dillard 1.32 // S ::= (#x20 \| #x9 \| #xD \| #xA)+
138 mike 1.34 static inline int _isspace(char c)
139 chuck 1.26 {
140 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
141 chuck 1.26 } 142
143 mike 1.13 144 //////////////////////////////////////////////////////////////////////////////// 145 // 146 // XmlException 147 // 148 //////////////////////////////////////////////////////////////////////////////// 149 150 static const char* _xmlMessages[] = 151 { 152 "Bad opening element", 153 "Bad closing element", 154 "Bad attribute name", 155 "Exepected equal sign", 156 "Bad attribute value", 157 "A \"--\" sequence found within comment", 158 "Unterminated comment", 159 "Unterminated CDATA block", 160 "Unterminated DOCTYPE", 161 "Malformed reference", 162 "Expected a comment or CDATA following \"<!\" sequence", 163 "Closing element does not match opening element", 164 mike 1.13 "One or more tags are still open", 165 "More than one root element was encountered", 166 "Validation error",
167 kumpf 1.45 "Semantic error", 168 "Namespace not declared"
169 mike 1.13 }; 170
171 david.dillard 1.32 static const char* _xmlKeys[] =
172 chuck 1.19 {
173 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
174 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 175 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 176 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 177 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 178 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 179 "Common.XmlParser.UNTERMINATED_COMMENT", 180 "Common.XmlParser.UNTERMINATED_CDATA", 181 "Common.XmlParser.UNTERMINATED_DOCTYPE", 182 "Common.XmlParser.MALFORMED_REFERENCE", 183 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 184 "Common.XmlParser.START_END_MISMATCH",
185 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
186 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS", 187 "Common.XmlParser.VALIDATION_ERROR",
188 kumpf 1.45 "Common.XmlParser.SEMANTIC_ERROR", 189 "Common.XmlParser.UNDECLARED_NAMESPACE"
190 chuck 1.19 }; 191 192
193 kumpf 1.40 static MessageLoaderParms _formMessage( 194 Uint32 code, 195 Uint32 line, 196 const String& message)
197 chuck 1.19 { 198 String dftMsg = _xmlMessages[Uint32(code) - 1];
199 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
200 kumpf 1.45 String msg = message;
201 chuck 1.19 202 dftMsg.append(": on line $0"); 203 if (message.size()) 204 {
205 david.dillard 1.32 msg = ": " + msg; 206 dftMsg.append("$1"); 207 }
208 chuck 1.19
209 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line ,msg);
210 chuck 1.19 } 211 212 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 213 { 214 String dftMsg = _xmlMessages[Uint32(code) - 1];
215 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
216 chuck 1.19 217 dftMsg.append(": on line $0");
218 david.dillard 1.32
219 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line);
220 chuck 1.19 } 221
222 mike 1.13 223 XmlException::XmlException(
224 david.dillard 1.32 XmlException::Code code,
225 mike 1.13 Uint32 lineNumber,
226 david.dillard 1.32 const String& message)
227 mike 1.13 : Exception(_formMessage(code, lineNumber, message)) 228 { 229 230 } 231
232 chuck 1.19 233 XmlException::XmlException(
234 david.dillard 1.32 XmlException::Code code,
235 chuck 1.19 Uint32 lineNumber,
236 david.dillard 1.32 MessageLoaderParms& msgParms)
237 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber)) 238 {
239 david.dillard 1.32 if (msgParms.default_msg.size())
240 humberto 1.21 {
241 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg; 242 } 243 _rep->message.append(MessageLoader::getMessage(msgParms));
244 chuck 1.19 } 245 246
247 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 248 // 249 // XmlValidationError 250 // 251 //////////////////////////////////////////////////////////////////////////////// 252 253 XmlValidationError::XmlValidationError( 254 Uint32 lineNumber, 255 const String& message) 256 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 257 { 258 } 259
260 chuck 1.19 261 XmlValidationError::XmlValidationError( 262 Uint32 lineNumber, 263 MessageLoaderParms& msgParms) 264 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 265 { 266 } 267 268
269 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 270 // 271 // XmlSemanticError 272 // 273 //////////////////////////////////////////////////////////////////////////////// 274 275 XmlSemanticError::XmlSemanticError( 276 Uint32 lineNumber, 277 const String& message) 278 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 279 { 280 }
281 chuck 1.19 282 283 XmlSemanticError::XmlSemanticError( 284 Uint32 lineNumber, 285 MessageLoaderParms& msgParms) 286 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 287 { 288 } 289
290 mike 1.13 291 //////////////////////////////////////////////////////////////////////////////// 292 // 293 // XmlParser 294 // 295 //////////////////////////////////////////////////////////////////////////////// 296
297 mike 1.55 XmlParser::XmlParser(char* text, XmlNamespace* ns, Boolean hideEmptyTags)
298 kumpf 1.40 : _line(1), 299 _current(text), 300 _restoreChar('\0'),
301 kumpf 1.45 _foundRoot(false), 302 _supportedNamespaces(ns), 303 // Start valid indexes with -2. -1 is reserved for not found.
304 mike 1.55 _currentUnsupportedNSType(-2), 305 _hideEmptyTags(hideEmptyTags)
306 mike 1.13 { 307 } 308
309 mike 1.34 inline void _skipWhitespace(Uint32& line, char& p) 310 { 311 while (p && _isspace(p)) 312 { 313 if (p == '\n') 314 line++; 315 316 p++; 317 } 318 } 319
320 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) \|\| \ 321 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) 322 #pragma optimize( "", off ) 323 #endif
324 kumpf 1.37 static int _getEntityRef(char*& p) 325 { 326 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) 327 { 328 p += 3; 329 return '>'; 330 } 331 332 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) 333 { 334 p += 3; 335 return '<'; 336 } 337 338 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && 339 (p[4] == ';')) 340 { 341 p += 5; 342 return '\''; 343 } 344 345 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && 346 (p[4] == ';')) 347 { 348 p += 5; 349 return '"'; 350 } 351 352 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) 353 { 354 p += 4; 355 return '&'; 356 } 357 358 return -1; 359 }
360 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) \|\| \ 361 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
362 kumpf 1.45 #pragma optimize( "", on )
363 s.manicka 1.43 #endif
364 kumpf 1.37
365 kumpf 1.50 static inline int _getCharRef(char*& p)
366 kumpf 1.37 { 367 char* end; 368 unsigned long ch;
369 kumpf 1.50 Boolean hex = false;
370 kumpf 1.37
371 kumpf 1.50 if (*p == 'x')
372 kumpf 1.37 {
373 kumpf 1.50 hex = true; 374 ch = strtoul(++p, &end, 16);
375 kumpf 1.37 } 376 else 377 { 378 ch = strtoul(p, &end, 10); 379 } 380 381 if ((end == p) \|\| (*end != ';') \|\| (ch > 255)) 382 { 383 return -1; 384 } 385 386 if ((hex && (end - p > 4)) \|\| (!hex && (end - p > 5))) 387 { 388 return -1; 389 } 390 391 p = end + 1; 392 393 return ch; 394 } 395
396 kumpf 1.50 // Parse an entity reference or a character reference 397 static inline int _getRef(Uint32 line, char& p) 398 { 399 int ch; 400 401 if (p == '#') 402 { 403 ch = _getCharRef(++p); 404 } 405 else 406 { 407 ch = _getEntityRef(p); 408 } 409 410 if (ch == -1) 411 { 412 throw XmlException(XmlException::MALFORMED_REFERENCE, line); 413 } 414 415 return ch; 416 } 417 kumpf 1.50 418 static inline void _normalizeElementValue( 419 Uint32& line,
420 venkat.puvvada 1.54 char*& p, 421 Uint32 &textLen)
422 kumpf 1.50 { 423 // Process one character at a time: 424 425 char* q = p;
426 venkat.puvvada 1.54 char *start = p;
427 kumpf 1.50 428 while (p && (p != '<')) 429 { 430 if (_isspace(p)) 431 { 432 // Trim whitespace from the end of the value, but do not compress 433 // whitespace within the value. 434 435 const char start = p; 436 437 if (p++ == '\n') 438 { 439 line++; 440 } 441 442 _skipWhitespace(line, p); 443 444 if (p && (p != '<')) 445 { 446 // Transfer internal whitespace to q without compressing it. 447 const char i = start; 448 kumpf 1.50 while (i < p) 449 { 450 q++ = i++; 451 } 452 } 453 else 454 { 455 // Do not transfer trailing whitespace to q. 456 break; 457 } 458 } 459 else if (p == '&') 460 { 461 // Process an entity reference or a character reference. 462 463 q++ = _getRef(line, ++p); 464 } 465 else 466 { 467 q++ = p++; 468 } 469 kumpf 1.50 } 470 471 // If q got behind p, it is safe and necessary to null-terminate q 472 473 if (q != p) 474 { 475 *q = '\0'; 476 }
477 venkat.puvvada 1.54 textLen = (Uint32)(q - start);
478 kumpf 1.50 } 479 480 static inline void _normalizeAttributeValue( 481 Uint32& line, 482 char& p, 483 char end_char, 484 char& start)
485 kumpf 1.37 { 486 // Skip over leading whitespace: 487 488 _skipWhitespace(line, p); 489 start = p; 490 491 // Process one character at a time: 492 493 char* q = p; 494 495 while (p && (p != end_char)) 496 { 497 if (_isspace(p)) 498 { 499 // Compress sequences of whitespace characters to a single space 500 // character. Update line number when newlines encountered. 501 502 if (p++ == '\n') 503 { 504 line++; 505 } 506 kumpf 1.37 507 q++ = ' '; 508 509 _skipWhitespace(line, p); 510 } 511 else if (p == '&') 512 {
513 kumpf 1.50 // Process an entity reference or a character reference.
514 kumpf 1.37
515 kumpf 1.50 *q++ = _getRef(line, ++p);
516 kumpf 1.37 } 517 else 518 { 519 q++ = p++; 520 } 521 } 522 523 // Remove single trailing whitespace (consecutive whitespaces already 524 // compressed above). Since p >= q, we can tell if we need to strip a 525 // trailing space from q by looking at the end of p. We must not look at 526 // the last character of p, though, if p is an empty string.
527 dmitry.mikulin 1.44 Boolean adjust_q = (p != start) && _isspace(p[-1]); 528 529 // We encountered a the end_char or a zero-terminator. 530 531 q = p;
532 kumpf 1.37
533 dmitry.mikulin 1.44 if (adjust_q)
534 kumpf 1.37 { 535 q--; 536 } 537 538 // If q got behind p, it is safe and necessary to null-terminate q 539 540 if (q != p) 541 { 542 *q = '\0'; 543 } 544 } 545
546 mike 1.55 Boolean XmlParser::_next(
547 kumpf 1.45 XmlEntry& entry, 548 Boolean includeComment)
549 mike 1.13 { 550 if (!_putBackStack.isEmpty()) 551 {
552 david.dillard 1.32 entry = _putBackStack.top(); 553 _putBackStack.pop(); 554 return true;
555 mike 1.13 } 556 557 // If a character was overwritten with a null-terminator the last 558 // time this routine was called, then put back that character. Before 559 // exiting of course, restore the null-terminator. 560 561 char* nullTerminator = 0; 562 563 if (_restoreChar && !*_current) 564 {
565 david.dillard 1.32 nullTerminator = _current; 566 *_current = _restoreChar; 567 _restoreChar = '\0';
568 mike 1.13 } 569
570 kumpf 1.45 entry.attributes.clear(); 571 572 if (_supportedNamespaces) 573 { 574 // Remove namespaces of a deeper scope level from the stack. 575 while (!_nameSpaces.isEmpty() && 576 _nameSpaces.top().scopeLevel > _stack.size()) 577 { 578 _nameSpaces.pop(); 579 } 580 } 581
582 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false. 583 do 584 { 585 // Skip over any whitespace: 586 _skipWhitespace(_line, _current); 587 588 if (!_current) 589 { 590 if (nullTerminator) 591 nullTerminator = '\0';
592 mike 1.13
593 venkat.puvvada 1.41 if (!_stack.isEmpty()) 594 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
595 mike 1.13
596 venkat.puvvada 1.41 return false; 597 }
598 mike 1.13
599 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
600 mike 1.13
601 venkat.puvvada 1.41 if (*_current == '<') 602 { 603 _current++; 604 _getElement(_current, entry);
605 mike 1.13
606 venkat.puvvada 1.41 if (nullTerminator) 607 *nullTerminator = '\0';
608 mike 1.13
609 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG) 610 { 611 if (_stack.isEmpty() && _foundRoot) 612 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
613 mike 1.13
614 venkat.puvvada 1.41 _foundRoot = true; 615 _stack.push((char*)entry.text); 616 } 617 else if (entry.type == XmlEntry::END_TAG) 618 { 619 if (_stack.isEmpty()) 620 throw XmlException(XmlException::START_END_MISMATCH, _line);
621 mike 1.13
622 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0) 623 throw XmlException(XmlException::START_END_MISMATCH, _line);
624 david.dillard 1.32
625 venkat.puvvada 1.41 _stack.pop(); 626 }
627 david.dillard 1.32 }
628 venkat.puvvada 1.41 else
629 david.dillard 1.32 {
630 venkat.puvvada 1.41 // Normalize the content:
631 mike 1.13
632 kumpf 1.50 char* start = _current;
633 venkat.puvvada 1.54 Uint32 textLen; 634 _normalizeElementValue(_line, _current, textLen);
635 mike 1.13
636 venkat.puvvada 1.41 // Get the content:
637 mike 1.13
638 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT; 639 entry.text = start;
640 venkat.puvvada 1.54 entry.textLen = textLen;
641 kumpf 1.37
642 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
643 kumpf 1.37
644 venkat.puvvada 1.41 _restoreChar = _current; 645 _current = '\0';
646 kumpf 1.37
647 venkat.puvvada 1.41 if (nullTerminator) 648 *nullTerminator = '\0'; 649 }
650 kumpf 1.45 } while (!includeComment && entry.type == XmlEntry::COMMENT); 651 652 if (_supportedNamespaces && 653 (entry.type == XmlEntry::START_TAG \|\| 654 entry.type == XmlEntry::EMPTY_TAG \|\| 655 entry.type == XmlEntry::END_TAG)) 656 { 657 // Determine the namespace type for this entry 658 659 if (entry.type == XmlEntry::START_TAG \|\| 660 entry.type == XmlEntry::EMPTY_TAG) 661 { 662 // Process namespace declarations and determine the namespace type 663 // for the attributes. 664 665 Uint32 scopeLevel = _stack.size(); 666 if (entry.type == XmlEntry::EMPTY_TAG) 667 { 668 // Empty tags are deeper scope, but not pushed onto the stack 669 scopeLevel++; 670 } 671 kumpf 1.45 672 for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++) 673 { 674 XmlAttribute& attr = entry.attributes[i]; 675 if ((strncmp(attr.name, "xmlns:", 6) == 0) \|\| 676 (strcmp(attr.name, "xmlns") == 0)) 677 { 678 // Process a namespace declaration 679 XmlNamespace ns; 680 if (attr.name[5] == ':') 681 { 682 ns.localName = attr.localName; 683 } 684 else 685 { 686 // Default name space has no local name 687 ns.localName = 0; 688 } 689 ns.extendedName = attr.value; 690 ns.scopeLevel = scopeLevel; 691 ns.type = _getSupportedNamespaceType(ns.extendedName); 692 kumpf 1.45 693 // If the namespace is not supported, assign it a unique 694 // negative identifier. 695 if (ns.type == -1) 696 { 697 ns.type = _currentUnsupportedNSType--; 698 } 699 700 _nameSpaces.push(ns); 701 } 702 else 703 { 704 // Get the namespace type for this attribute. 705 attr.nsType = _getNamespaceType(attr.name); 706 } 707 } 708 } 709 710 entry.nsType = _getNamespaceType(entry.text); 711 } 712 else 713 kumpf 1.45 { 714 entry.nsType = -1; 715 }
716 kumpf 1.37
717 venkat.puvvada 1.41 return true;
718 mike 1.13 } 719
720 mike 1.55 Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment) 721 { 722 if (_hideEmptyTags) 723 { 724 // Get the next tag. 725 726 if (!_next(entry, includeComment)) 727 return false; 728 729 // If an EMPTY_TAG is encountered, then convert it to a START_TAG and 730 // push a matching END_TAG on the put-back stack. This hides every 731 // EMPTY_TAG from the caller. 732 733 if (entry.type == XmlEntry::EMPTY_TAG) 734 { 735 entry.type = XmlEntry::START_TAG; 736 737 XmlEntry tmp; 738 tmp.type = XmlEntry::END_TAG; 739 tmp.text = entry.text; 740 tmp.nsType = entry.nsType; 741 mike 1.55 tmp.localName = entry.localName; 742 743 _putBackStack.push(tmp); 744 } 745 746 return true; 747 } 748 else 749 return _next(entry, includeComment); 750 } 751
752 kumpf 1.45 // Get the namespace type of the given tag 753 int XmlParser::_getNamespaceType(const char* tag) 754 { 755 const char* pos = strchr(tag, ':'); 756 757 // If ':' is not found, the tag is not namespace qualified and we 758 // need to look for the default name space. 759 760 // Search the namespace stack from the top 761 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--) 762 { 763 // If ':' is found, look for the name space with the matching 764 // local name... 765 if ((pos && _nameSpaces[i].localName && 766 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) \|\| 767 // ... otherwise look for the default name space. It's the 768 // one with localName set to NULL 769 (!pos && !_nameSpaces[i].localName)) 770 { 771 return _nameSpaces[i].type; 772 } 773 kumpf 1.45 } 774 775 // If the tag is namespace qualified, but the name space has not been 776 // declared, it's malformed XML and we must throw an exception. 777 // Note: The "xml" namespace is specifically defined by the W3C as a 778 // reserved prefix ("http://www.w3.org/XML/1998/namespace"). 779 if (pos && (strncmp(tag, "xml:", 4) != 0)) 780 { 781 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line); 782 } 783 784 // Otherwise it's OK not to have a name space. 785 return -1; 786 } 787 788 // Given the extended namespace name, find it in the table of supported 789 // namespaces and return its type. 790 int XmlParser::_getSupportedNamespaceType(const char* extendedName) 791 { 792 for (Sint32 i = 0; 793 _supportedNamespaces[i].localName != 0; 794 kumpf 1.45 i++) 795 { 796 PEGASUS_ASSERT(_supportedNamespaces[i].type == i); 797 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName)) 798 { 799 return _supportedNamespaces[i].type; 800 } 801 } 802 return -1; 803 } 804 805 XmlNamespace* XmlParser::getNamespace(int nsType) 806 { 807 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--) 808 { 809 if (_nameSpaces[i].type == nsType) 810 { 811 return &_nameSpaces[i]; 812 } 813 } 814 return 0; 815 kumpf 1.45 } 816
817 mike 1.13 void XmlParser::putBack(XmlEntry& entry) 818 { 819 _putBackStack.push(entry); 820 } 821 822 XmlParser::~XmlParser() 823 { 824 // Nothing to do! 825 } 826
827 kumpf 1.45 // A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)
828 kumpf 1.40 static unsigned char _isInnerElementChar[] =
829 mike 1.35 { 830 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
831 kumpf 1.45 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
832 mike 1.35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 833 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 834 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 835 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
836 thilo.boehm 1.49 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
837 mike 1.35 }; 838
839 kumpf 1.45 inline Boolean _getQName(char& p, const char& localName)
840 mike 1.13 {
841 kumpf 1.45 localName = p; 842
843 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
844 kumpf 1.45 return false;
845 mike 1.35
846 kumpf 1.24 p++;
847 mike 1.13
848 kumpf 1.53 // No explicit test for NULL termination is needed. 849 // On position 0 of the array false is returned.
850 thilo.boehm 1.49 while (_isInnerElementChar[Uint8(*p)])
851 david.dillard 1.32 p++;
852 mike 1.13
853 kumpf 1.45 // We've validated the prefix, now validate the local name 854 if (p == ':') 855 { 856 localName = ++p; 857 858 if (!CharSet::isAlNumUnder(Uint8(p))) 859 return false; 860 861 p++;
862 kumpf 1.53 // No explicit test for NULL termination is needed. 863 // On position 0 of the array false is returned.
864 thilo.boehm 1.49 while (_isInnerElementChar[Uint8(*p)])
865 kumpf 1.45 p++; 866 } 867 868 return true; 869 } 870 871 Boolean XmlParser::_getElementName(char& p, const char& localName) 872 { 873 if (!_getQName(p, localName)) 874 throw XmlException(XmlException::BAD_START_TAG, _line); 875
876 mike 1.13 // The next character must be a space: 877
878 chuck 1.26 if (_isspace(*p))
879 mike 1.13 {
880 david.dillard 1.32 *p++ = '\0';
881 mike 1.34 _skipWhitespace(_line, p);
882 mike 1.13 } 883 884 if (*p == '>') 885 {
886 david.dillard 1.32 *p++ = '\0'; 887 return true;
888 mike 1.13 } 889 890 return false; 891 } 892
893 kumpf 1.45 Boolean XmlParser::_getOpenElementName( 894 char& p, 895 const char& localName, 896 Boolean& openCloseElement)
897 mike 1.13 { 898 openCloseElement = false; 899
900 kumpf 1.45 if (!_getQName(p, localName))
901 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
902 mike 1.35
903 mike 1.13 // The next character must be a space: 904
905 chuck 1.26 if (_isspace(*p))
906 mike 1.13 {
907 david.dillard 1.32 *p++ = '\0';
908 mike 1.34 _skipWhitespace(_line, p);
909 mike 1.13 } 910 911 if (*p == '>') 912 {
913 david.dillard 1.32 *p++ = '\0'; 914 return true;
915 mike 1.13 } 916 917 if (p[0] == '/' && p[1] == '>') 918 {
919 david.dillard 1.32 openCloseElement = true; 920 *p = '\0'; 921 p += 2; 922 return true;
923 mike 1.13 } 924 925 return false; 926 } 927
928 kumpf 1.45 void XmlParser::_getAttributeNameAndEqual(char& p, const char& localName)
929 mike 1.13 {
930 kumpf 1.45 if (!_getQName(p, localName))
931 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
932 mike 1.35
933 mike 1.13 char* term = p; 934
935 mike 1.34 _skipWhitespace(_line, p);
936 mike 1.13 937 if (*p != '=')
938 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
939 mike 1.13 940 p++; 941
942 mike 1.34 _skipWhitespace(_line, p);
943 mike 1.13 944 term = '\0'; 945 } 946 947 void XmlParser::_getComment(char& p) 948 { 949 // Now p points to first non-whitespace character beyond "<--" sequence: 950 951 for (; *p; p++) 952 {
953 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 954 { 955 if (p[2] != '>') 956 { 957 throw XmlException( 958 XmlException::MINUS_MINUS_IN_COMMENT, _line); 959 } 960 961 // Find end of comment (excluding whitespace): 962 963 *p = '\0'; 964 p += 3; 965 return; 966 }
967 mike 1.13 } 968 969 // If it got this far, then the comment is unterminated: 970 971 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 972 } 973 974 void XmlParser::_getCData(char& p) 975 { 976 // At this point p points one past "<![CDATA[" sequence: 977 978 for (; p; p++) 979 {
980 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 981 { 982 p = '\0'; 983 p += 3; 984 return; 985 } 986 else if (p == '\n') 987 _line++;
988 mike 1.13 } 989 990 // If it got this far, then the comment is unterminated: 991 992 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 993 } 994 995 void XmlParser::_getDocType(char& p) 996 { 997 // Just ignore the DOCTYPE command for now: 998 999 for (; p && *p != '>'; p++) 1000 {
1001 david.dillard 1.32 if (*p == '\n') 1002 _line++;
1003 mike 1.13 } 1004 1005 if (*p != '>')
1006 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
1007 mike 1.13 1008 p++; 1009 } 1010 1011 void XmlParser::_getElement(char& p, XmlEntry& entry) 1012 { 1013 //-------------------------------------------------------------------------- 1014 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 1015 //-------------------------------------------------------------------------- 1016 1017 if (p == '?') 1018 {
1019 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION; 1020 entry.text = ++p;
1021 mike 1.13
1022 kumpf 1.45 if (_getElementName(p, entry.localName))
1023 david.dillard 1.32 return;
1024 mike 1.13 } 1025 else if (*p == '!') 1026 {
1027 david.dillard 1.32 p++;
1028 mike 1.13
1029 david.dillard 1.32 // Expect a comment or CDATA:
1030 mike 1.13
1031 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 1032 { 1033 p += 2; 1034 entry.type = XmlEntry::COMMENT; 1035 entry.text = p; 1036 _getComment(p); 1037 return; 1038 } 1039 else if (memcmp(p, "[CDATA[", 7) == 0) 1040 { 1041 p += 7; 1042 entry.type = XmlEntry::CDATA; 1043 entry.text = p; 1044 _getCData(p);
1045 venkat.puvvada 1.54 entry.textLen = strlen(entry.text);
1046 david.dillard 1.32 return; 1047 } 1048 else if (memcmp(p, "DOCTYPE", 7) == 0) 1049 { 1050 entry.type = XmlEntry::DOCTYPE;
1051 kumpf 1.37 entry.text = "";
1052 david.dillard 1.32 _getDocType(p); 1053 return; 1054 } 1055 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
1056 mike 1.13 } 1057 else if (*p == '/') 1058 {
1059 david.dillard 1.32 entry.type = XmlEntry::END_TAG; 1060 entry.text = ++p;
1061 mike 1.13
1062 kumpf 1.45 if (!_getElementName(p, entry.localName))
1063 david.dillard 1.32 throw(XmlException(XmlException::BAD_END_TAG, _line));
1064 mike 1.13
1065 david.dillard 1.32 return;
1066 mike 1.13 }
1067 thilo.boehm 1.49 else if (CharSet::isAlphaUnder(Uint8(*p)))
1068 mike 1.13 {
1069 david.dillard 1.32 entry.type = XmlEntry::START_TAG; 1070 entry.text = p;
1071 mike 1.13
1072 david.dillard 1.32 Boolean openCloseElement = false;
1073 mike 1.13
1074 kumpf 1.45 if (_getOpenElementName(p, entry.localName, openCloseElement))
1075 david.dillard 1.32 { 1076 if (openCloseElement) 1077 entry.type = XmlEntry::EMPTY_TAG; 1078 return; 1079 }
1080 mike 1.13 } 1081 else
1082 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
1083 mike 1.13 1084 //-------------------------------------------------------------------------- 1085 // Grab all the attributes: 1086 //-------------------------------------------------------------------------- 1087 1088 for (;;) 1089 {
1090 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION) 1091 { 1092 if (p[0] == '?' && p[1] == '>') 1093 { 1094 p += 2; 1095 return; 1096 } 1097 } 1098 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 1099 { 1100 entry.type = XmlEntry::EMPTY_TAG; 1101 p += 2; 1102 return; 1103 } 1104 else if (*p == '>') 1105 { 1106 p++; 1107 return; 1108 } 1109 1110 XmlAttribute attr;
1111 kumpf 1.45 attr.nsType = -1;
1112 david.dillard 1.32 attr.name = p;
1113 kumpf 1.45 _getAttributeNameAndEqual(p, attr.localName);
1114 david.dillard 1.32
1115 kumpf 1.37 // Get the attribute value (e.g., "some value") 1116 { 1117 if ((p != '"') && (p != '\'')) 1118 { 1119 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1120 } 1121 1122 char quote = p++; 1123 1124 char start;
1125 kumpf 1.50 _normalizeAttributeValue(_line, p, quote, start);
1126 kumpf 1.37 attr.value = start; 1127 1128 if (*p != quote) 1129 { 1130 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1131 } 1132 1133 // Overwrite the closing quote with a null-terminator:
1134 david.dillard 1.32
1135 kumpf 1.37 *p++ = '\0'; 1136 }
1137 david.dillard 1.32 1138 if (entry.type == XmlEntry::XML_DECLARATION) 1139 { 1140 // The next thing must a space or a "?>":
1141 mike 1.13
1142 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(p)) 1143 { 1144 throw XmlException( 1145 XmlException::BAD_ATTRIBUTE_VALUE, _line); 1146 } 1147 } 1148 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| _isspace(*p))) 1149 { 1150 // The next thing must be a space or a '>':
1151 mike 1.13
1152 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1153 }
1154 mike 1.13
1155 mike 1.34 _skipWhitespace(_line, p);
1156 david.dillard 1.32
1157 kumpf 1.45 entry.attributes.append(attr);
1158 mike 1.13 } 1159 } 1160 1161 static const char* _typeStrings[] = 1162 {
1163 david.dillard 1.32 "XML_DECLARATION", 1164 "START_TAG", 1165 "EMPTY_TAG", 1166 "END_TAG",
1167 mike 1.13 "COMMENT", 1168 "CDATA", 1169 "DOCTYPE",
1170 david.dillard 1.32 "CONTENT"
1171 mike 1.13 }; 1172 1173 void XmlEntry::print() const 1174 { 1175 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 1176 1177 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 1178 1179 if (needQuotes)
1180 david.dillard 1.32 PEGASUS_STD(cout) << "\""; 1181
1182 mike 1.13 _printValue(text); 1183 1184 if (needQuotes)
1185 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1186 mike 1.13 1187 PEGASUS_STD(cout) << '\n'; 1188
1189 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
1190 mike 1.13 {
1191 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 1192 _printValue(attributes[i].value); 1193 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1194 mike 1.13 } 1195 } 1196 1197 const XmlAttribute* XmlEntry::findAttribute( 1198 const char* name) const 1199 {
1200 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
1201 mike 1.13 {
1202 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0) 1203 return &attributes[i];
1204 mike 1.13 } 1205 1206 return 0; 1207 } 1208
1209 kumpf 1.45 const XmlAttribute* XmlEntry::findAttribute(
1210 kumpf 1.47 int attrNsType,
1211 kumpf 1.45 const char* name) const 1212 { 1213 for (Uint32 i = 0, n = attributes.size(); i < n; i++) 1214 {
1215 kumpf 1.47 if ((attributes[i].nsType == attrNsType) &&
1216 kumpf 1.45 (strcmp(attributes[i].localName, name) == 0)) 1217 { 1218 return &attributes[i]; 1219 } 1220 } 1221 1222 return 0; 1223 } 1224
1225 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace 1226 // character (set last one past this). For example, consider this string: 1227 //
1228 david.dillard 1.32 // " 87 "
1229 mike 1.13 // 1230 // The first pointer would point to '8' and the last pointer woudl point one 1231 // beyond '7'. 1232 1233 static void _findEnds(
1234 david.dillard 1.32 const char* str, 1235 const char*& first,
1236 mike 1.13 const char*& last) 1237 { 1238 first = str; 1239
1240 chuck 1.26 while (_isspace(*first))
1241 david.dillard 1.32 first++;
1242 mike 1.13 1243 if (!*first) 1244 {
1245 david.dillard 1.32 last = first; 1246 return;
1247 mike 1.13 } 1248 1249 last = first + strlen(first); 1250
1251 chuck 1.26 while (last != first && _isspace(last[-1]))
1252 david.dillard 1.32 last--;
1253 mike 1.13 } 1254 1255 Boolean XmlEntry::getAttributeValue(
1256 david.dillard 1.32 const char* name,
1257 mike 1.13 Uint32& value) const 1258 { 1259 const XmlAttribute* attr = findAttribute(name); 1260 1261 if (!attr)
1262 david.dillard 1.32 return false;
1263 mike 1.13 1264 const char* first; 1265 const char* last; 1266 _findEnds(attr->value, first, last); 1267 1268 char* end = 0; 1269 long tmp = strtol(first, &end, 10); 1270 1271 if (!end \|\| end != last)
1272 david.dillard 1.32 return false;
1273 mike 1.13 1274 value = Uint32(tmp); 1275 return true; 1276 } 1277 1278 Boolean XmlEntry::getAttributeValue(
1279 david.dillard 1.32 const char* name,
1280 mike 1.13 Real32& value) const 1281 { 1282 const XmlAttribute* attr = findAttribute(name); 1283 1284 if (!attr)
1285 david.dillard 1.32 return false;
1286 mike 1.13 1287 const char* first; 1288 const char* last; 1289 _findEnds(attr->value, first, last); 1290 1291 char* end = 0; 1292 double tmp = strtod(first, &end); 1293 1294 if (!end \|\| end != last)
1295 david.dillard 1.32 return false;
1296 mike 1.13
1297 david.dillard 1.32 value = static_cast<Real32>(tmp);
1298 mike 1.13 return true; 1299 } 1300 1301 Boolean XmlEntry::getAttributeValue(
1302 david.dillard 1.32 const char* name,
1303 mike 1.13 const char& value) const 1304 { 1305 const XmlAttribute attr = findAttribute(name); 1306 1307 if (!attr)
1308 david.dillard 1.32 return false;
1309 mike 1.13 1310 value = attr->value; 1311 return true; 1312 } 1313 1314 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1315 { 1316 const char* tmp; 1317 1318 if (!getAttributeValue(name, tmp))
1319 david.dillard 1.32 return false;
1320 mike 1.13
1321 chuck 1.28 value = String(tmp);
1322 mike 1.13 return true; 1323 } 1324
1325 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
1326 mike 1.13 {
1327 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
1328 mike 1.13 } 1329 1330 PEGASUS_NAMESPACE_END

No CVS admin address has been configured