pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 mike 1.13 //%///////////////////////////////////////////////////////////////////////////// 2 //
3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM, 4 // The Open Group, Tivoli Systems
5 mike 1.13 // 6 // Permission is hereby granted, free of charge, to any person obtaining a copy
7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to 8 // deal in the Software without restriction, including without limitation the 9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is 11 // furnished to do so, subject to the following conditions: 12 //
13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
14 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 // 22 //============================================================================== 23 // 24 // Author: Mike Brasher (mbrasher@bmc.com) 25 // 26 // Modified By: 27 // 28 //%///////////////////////////////////////////////////////////////////////////// 29 30 //////////////////////////////////////////////////////////////////////////////// 31 // 32 // XmlParser 33 // 34 // This file contains a simple non-validating XML parser. Here are 35 // serveral rules for well-formed XML: 36 // 37 // 1. Documents must begin with an XML declaration: 38 // 39 // <?xml version="1.0" standalone="yes"?> 40 mike 1.13 // 41 // 2. Comments have the form: 42 // 43 // <!-- blah blah blah --> 44 // 45 // 3. The following entity references are supported: 46 // 47 // &amp - ampersand 48 // &lt - less-than 49 // &gt - greater-than 50 // &quot - full quote 51 // &apos - apostrophe 52 //
53 kumpf 1.18 // as well as character (numeric) references: 54 55 // 1 - decimal reference for character '1' 56 // 1 - hexadecimal reference for character '1' 57 //
58 mike 1.13 // 4. Element names and attribute names take the following form: 59 // 60 // [A-Za-z_][A-Za-z_0-9-.:] 61 // 62 // 5. Arbitrary data (CDATA) can be enclosed like this: 63 // 64 // <![CDATA[ 65 // ... 66 // ]]> 67 // 68 // 6. Element names and attributes names are case-sensitive. 69 // 70 // 7. XmlAttribute values must be delimited by full or half quotes. 71 // XmlAttribute values must be delimited. 72 // 73 // 8. <!DOCTYPE...> 74 // 75 // TODO: 76 //
77 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
78 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing 79 // rules rather than references to files). 80 // 81 // Remove newlines from string literals: 82 // 83 // Example: <xyz x="hello 84 // world"> 85 // 86 //////////////////////////////////////////////////////////////////////////////// 87
88 sage 1.14 #include <Pegasus/Common/Config.h>
89 mike 1.13 #include <cctype> 90 #include <cstdio> 91 #include <cstdlib> 92 #include <cstring> 93 #include "XmlParser.h" 94 #include "Logger.h"
95 chuck 1.19 #include "ExceptionRep.h"
96 mike 1.13 97 PEGASUS_NAMESPACE_BEGIN 98 99 #define PEGASUS_ARRAY_T XmlEntry 100 # include "ArrayImpl.h" 101 #undef PEGASUS_ARRAY_T 102 103 104 //////////////////////////////////////////////////////////////////////////////// 105 // 106 // Static helper functions 107 // 108 //////////////////////////////////////////////////////////////////////////////// 109 110 static void _printValue(const char* p) 111 { 112 for (; p; p++) 113 { 114 if (p == '\n') 115 PEGASUS_STD(cout) << "\\n"; 116 else if (p == '\r') 117 mike 1.13 PEGASUS_STD(cout) << "\\r"; 118 else if (p == '\t') 119 PEGASUS_STD(cout) << "\\t"; 120 else 121 PEGASUS_STD(cout) << p; 122 } 123 } 124 125 struct EntityReference 126 { 127 const char match; 128 Uint32 length; 129 char replacement; 130 }; 131
132 kumpf 1.18 // ATTN: Add support for more entity references
133 mike 1.13 static EntityReference _references[] = 134 { 135 { "&", 5, '&' }, 136 { "<", 4, '<' }, 137 { ">", 4, '>' }, 138 { """, 6, '"' }, 139 { "'", 6, '\'' } 140 }; 141 142 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); 143 144 // Remove all redundant spaces from the given string: 145 146 static void _normalize(char* text) 147 { 148 Uint32 length = strlen(text); 149 char* p = text; 150 char* end = p + length; 151 152 // Remove leading spaces: 153 154 mike 1.13 while (isspace(p)) 155 p++; 156 157 if (p != text) 158 memmove(text, p, end - p + 1); 159 160 p = text; 161 162 // Look for sequences of more than one space and remove all but one. 163 164 for (;;) 165 { 166 // Advance to the next space: 167 168 while (p && !isspace(p)) 169 p++; 170 171 if (!p) 172 break; 173 174 // Advance to the next non-space: 175 mike 1.13 176 char* q = p++; 177 178 while (isspace(p)) 179 p++; 180 181 // Discard trailing spaces (if we are at the end): 182 183 if (!p) 184 { 185 q = '\0'; 186 break; 187 } 188 189 // Remove the redundant spaces: 190 191 Uint32 n = p - q; 192 193 if (n > 1) 194 { 195 q++ = ' '; 196 mike 1.13 memmove(q, p, end - p + 1); 197 p = q; 198 } 199 } 200 } 201 202 //////////////////////////////////////////////////////////////////////////////// 203 // 204 // XmlException 205 // 206 //////////////////////////////////////////////////////////////////////////////// 207 208 static const char* _xmlMessages[] = 209 { 210 "Bad opening element", 211 "Bad closing element", 212 "Bad attribute name", 213 "Exepected equal sign", 214 "Bad attribute value", 215 "A \"--\" sequence found within comment", 216 "Unterminated comment", 217 mike 1.13 "Unterminated CDATA block", 218 "Unterminated DOCTYPE", 219 "Too many attributes: parser only handles 10", 220 "Malformed reference", 221 "Expected a comment or CDATA following \"<!\" sequence", 222 "Closing element does not match opening element", 223 "One or more tags are still open", 224 "More than one root element was encountered", 225 "Validation error", 226 "Semantic error" 227 }; 228
229 chuck 1.19 static const char* _xmlKeys[] = 230 {
231 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
232 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 233 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 234 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 235 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 236 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 237 "Common.XmlParser.UNTERMINATED_COMMENT", 238 "Common.XmlParser.UNTERMINATED_CDATA", 239 "Common.XmlParser.UNTERMINATED_DOCTYPE", 240 "Common.XmlParser.TOO_MANY_ATTRIBUTES", 241 "Common.XmlParser.MALFORMED_REFERENCE", 242 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 243 "Common.XmlParser.START_END_MISMATCH", 244 "Common.XmlParser.UNCLOSED_TAGS", 245 "Common.XmlParser.MULTIPLE_ROOTS", 246 "Common.XmlParser.VALIDATION_ERROR", 247 "Common.XmlParser.SEMANTIC_ERROR" 248 }; 249 250 // l10n TODO replace _formMessage with the commented one and uncomment 251 // the new constructors 252 /*
253 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message) 254 { 255 String result = _xmlMessages[Uint32(code) - 1]; 256 257 char buffer[32]; 258 sprintf(buffer, "%d", line); 259 result.append(": on line "); 260 result.append(buffer); 261 262 if (message.size()) 263 { 264 result.append(": "); 265 result.append(message); 266 } 267 268 return result; 269 }
270 chuck 1.19 */ 271 272 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message) 273 { 274 String dftMsg = _xmlMessages[Uint32(code) - 1]; 275 String key = _xmlKeys[Uint32(code) - 1]; 276 String msg = message; 277 278 dftMsg.append(": on line $0"); 279 if (message.size()) 280 {
281 humberto 1.20 msg = ": " + msg;
282 chuck 1.19 dftMsg.append("$1"); 283 } 284 285 return MessageLoaderParms(key, dftMsg, line ,msg); 286 } 287 288 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 289 { 290 String dftMsg = _xmlMessages[Uint32(code) - 1]; 291 String key = _xmlKeys[Uint32(code) - 1]; 292 293 dftMsg.append(": on line $0"); 294 295 return MessageLoaderParms(key, dftMsg, line); 296 } 297
298 mike 1.13 299 XmlException::XmlException( 300 XmlException::Code code, 301 Uint32 lineNumber, 302 const String& message) 303 : Exception(_formMessage(code, lineNumber, message)) 304 { 305 306 } 307
308 chuck 1.19 309 XmlException::XmlException( 310 XmlException::Code code, 311 Uint32 lineNumber, 312 MessageLoaderParms& msgParms) 313 : Exception(_formPartialMessage(code, lineNumber)) 314 {
315 humberto 1.21 if (msgParms.default_msg.size()) 316 { 317 msgParms.default_msg = ": " + msgParms.default_msg; 318 }
319 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms)); 320 } 321 322
323 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 324 // 325 // XmlValidationError 326 // 327 //////////////////////////////////////////////////////////////////////////////// 328 329 XmlValidationError::XmlValidationError( 330 Uint32 lineNumber, 331 const String& message) 332 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 333 { 334 335 } 336
337 chuck 1.19 338 XmlValidationError::XmlValidationError( 339 Uint32 lineNumber, 340 MessageLoaderParms& msgParms) 341 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 342 { 343 344 } 345 346
347 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 348 // 349 // XmlSemanticError 350 // 351 //////////////////////////////////////////////////////////////////////////////// 352 353 XmlSemanticError::XmlSemanticError( 354 Uint32 lineNumber, 355 const String& message) 356 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 357 { 358 359 }
360 chuck 1.19 361 362 XmlSemanticError::XmlSemanticError( 363 Uint32 lineNumber, 364 MessageLoaderParms& msgParms) 365 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 366 { 367 368 } 369
370 mike 1.13 371 //////////////////////////////////////////////////////////////////////////////// 372 // 373 // XmlParser 374 // 375 //////////////////////////////////////////////////////////////////////////////// 376 377 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 378 _restoreChar('\0'), _foundRoot(false) 379 { 380 381 } 382 383 Boolean XmlParser::next(XmlEntry& entry) 384 { 385 if (!_putBackStack.isEmpty()) 386 { 387 entry = _putBackStack.top(); 388 _putBackStack.pop(); 389 return true; 390 } 391 mike 1.13 392 // If a character was overwritten with a null-terminator the last 393 // time this routine was called, then put back that character. Before 394 // exiting of course, restore the null-terminator. 395 396 char* nullTerminator = 0; 397 398 if (_restoreChar && !_current) 399 { 400 nullTerminator = _current; 401 _current = _restoreChar; 402 _restoreChar = '\0'; 403 } 404 405 // Skip over any whitespace: 406 407 _skipWhitespace(_current); 408 409 if (!_current) 410 { 411 if (nullTerminator) 412 mike 1.13 nullTerminator = '\0'; 413 414 if (!_stack.isEmpty()) 415 throw XmlException(XmlException::UNCLOSED_TAGS, _line); 416 417 return false; 418 } 419 420 // Either a "<...>" or content begins next: 421 422 if (_current == '<') 423 { 424 _current++; 425 _getElement(_current, entry); 426 427 if (nullTerminator) 428 nullTerminator = '\0'; 429 430 if (entry.type == XmlEntry::START_TAG) 431 { 432 if (_stack.isEmpty() && _foundRoot) 433 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line); 434 435 _foundRoot = true; 436 _stack.push((char)entry.text); 437 } 438 else if (entry.type == XmlEntry::END_TAG) 439 { 440 if (_stack.isEmpty()) 441 throw XmlException(XmlException::START_END_MISMATCH, _line); 442 443 if (strcmp(_stack.top(), entry.text) != 0) 444 throw XmlException(XmlException::START_END_MISMATCH, _line); 445 446 _stack.pop(); 447 } 448 449 return true; 450 } 451 else 452 { 453 entry.type = XmlEntry::CONTENT; 454 mike 1.13 entry.text = _current; 455 _getContent(_current); 456 _restoreChar = _current; 457 _current = '\0'; 458 459 if (nullTerminator) 460 nullTerminator = '\0'; 461 462 _substituteReferences((char)entry.text); 463 _normalize((char)entry.text); 464 465 return true; 466 } 467 } 468 469 void XmlParser::putBack(XmlEntry& entry) 470 { 471 _putBackStack.push(entry); 472 } 473 474 XmlParser::~XmlParser() 475 mike 1.13 { 476 // Nothing to do! 477 } 478 479 void XmlParser::_skipWhitespace(char& p) 480 { 481 while (p && isspace(p)) 482 { 483 if (p == '\n') 484 _line++; 485 486 p++; 487 } 488 } 489 490 Boolean XmlParser::_getElementName(char*& p) 491 {
492 david 1.22 if (!String::isUTF8(p))
493 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line); 494
495 david 1.22 while ((p) && 496 (((p >= 'A') && (p <= 'Z')) \|\| 497 ((p >= 'a') && (p <= 'z')) \|\| 498 p == '_' \|\| p == '-' \|\| p == ':' \|\| *p == '.'))
499 mike 1.13 p++; 500 501 // The next character must be a space: 502 503 if (isspace(p)) 504 { 505 p++ = '\0'; 506 _skipWhitespace(p); 507 } 508 509 if (p == '>') 510 { 511 p++ = '\0'; 512 return true; 513 } 514 515 return false; 516 } 517 518 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement) 519 { 520 mike 1.13 openCloseElement = false; 521
522 david 1.22 if (!String::isUTF8(p))
523 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line); 524
525 david 1.22 while ((p) && 526 (((p >= 'A') && (p <= 'Z')) \|\| 527 ((p >= 'a') && (p <= 'z')) \|\| 528 p == '_' \|\| p == '-' \|\| p == ':' \|\| *p == '.'))
529 mike 1.13 p++; 530 531 // The next character must be a space: 532 533 if (isspace(p)) 534 { 535 p++ = '\0'; 536 _skipWhitespace(p); 537 } 538 539 if (p == '>') 540 { 541 p++ = '\0'; 542 return true; 543 } 544 545 if (p[0] == '/' && p[1] == '>') 546 { 547 openCloseElement = true; 548 p = '\0'; 549 p += 2; 550 mike 1.13 return true; 551 } 552 553 return false; 554 } 555 556 void XmlParser::_getAttributeNameAndEqual(char& p) 557 {
558 david 1.22 if (!String::isUTF8(p))
559 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); 560
561 david 1.22 while ((p) && 562 (((p >= 'A') && (p <= 'Z')) \|\| 563 ((p >= 'a') && (p <= 'z')) \|\| 564 p == '_' \|\| p == '-' \|\| p == ':' \|\| *p == '.'))
565 mike 1.13 p++; 566 567 char* term = p; 568 569 _skipWhitespace(p); 570 571 if (p != '=') 572 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); 573 574 p++; 575 576 _skipWhitespace(p); 577 578 term = '\0'; 579 } 580 581 void XmlParser::_getAttributeValue(char& p) 582 { 583 // ATTN-B: handle values contained in semiquotes: 584 585 if (p != '"' && p != '\'') 586 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 587 588 char startChar = p++; 589 590 while (p && p != startChar) 591 p++; 592 593 if (p != startChar) 594 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 595 596 p++ = '\0'; 597 } 598 599 void XmlParser::_getComment(char& p) 600 { 601 // Now p points to first non-whitespace character beyond "<--" sequence: 602 603 for (; p; p++) 604 { 605 if (p[0] == '-' && p[1] == '-') 606 { 607 mike 1.13 if (p[2] != '>') 608 { 609 throw XmlException( 610 XmlException::MINUS_MINUS_IN_COMMENT, _line); 611 } 612 613 // Find end of comment (excluding whitespace): 614 615 p = '\0'; 616 p += 3; 617 return; 618 } 619 } 620 621 // If it got this far, then the comment is unterminated: 622 623 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 624 } 625 626 void XmlParser::_getCData(char& p) 627 { 628 mike 1.13 // At this point p points one past "<![CDATA[" sequence: 629 630 for (; p; p++) 631 { 632 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 633 { 634 p = '\0'; 635 p += 3; 636 return; 637 } 638 else if (p == '\n') 639 _line++; 640 } 641 642 // If it got this far, then the comment is unterminated: 643 644 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 645 } 646 647 void XmlParser::_getDocType(char& p) 648 { 649 mike 1.13 // Just ignore the DOCTYPE command for now: 650 651 for (; p && p != '>'; p++) 652 { 653 if (p == '\n') 654 _line++; 655 } 656 657 if (p != '>') 658 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line); 659 660 p++; 661 } 662 663 void XmlParser::_getContent(char& p) 664 { 665 while (p && p != '<') 666 { 667 if (p == '\n') 668 _line++; 669 670 mike 1.13 p++; 671 } 672 } 673 674 void XmlParser::_substituteReferences(char* text) 675 { 676 Uint32 rem = strlen(text); 677 678 for (char* p = text; p; p++, rem--) 679 { 680 if (p == '&') 681 {
682 kumpf 1.18 // Process character or entity reference
683 mike 1.13
684 kumpf 1.18 Uint16 referenceChar = 0; 685 Uint32 referenceLength = 0; 686 XmlException::Code code = XmlException::MALFORMED_REFERENCE; 687 688 if ((p+1) == '#') 689 { 690 // Found a character (numeric) reference 691 // Determine whether it is decimal or hex 692 if ((p+2) == 'x') 693 { 694 // Decode a hexadecimal character reference 695 char* q = p+3; 696 697 // At most four digits are allowed, plus trailing ';' 698 Uint32 numDigits; 699 for (numDigits = 0; numDigits < 5; numDigits++, q++) 700 { 701 if (isdigit(q)) 702 { 703 referenceChar = (referenceChar << 4); 704 referenceChar += (q - '0'); 705 kumpf 1.18 } 706 else if ((q >= 'A') && (q <= 'F')) 707 { 708 referenceChar = (referenceChar << 4); 709 referenceChar += (q - 'A' + 10); 710 } 711 else if ((q >= 'a') && (q <= 'f')) 712 { 713 referenceChar = (referenceChar << 4); 714 referenceChar += (q - 'a' + 10); 715 } 716 else if (q == ';') 717 { 718 break; 719 } 720 else 721 { 722 throw XmlException(code, _line); 723 } 724 } 725 726 kumpf 1.18 // Hex number must be 1 - 4 digits 727 if ((numDigits == 0) \|\| (numDigits > 4)) 728 { 729 throw XmlException(code, _line); 730 } 731 732 // ATTN: Currently do not support 16-bit characters 733 if (referenceChar > 0xff) 734 { 735 // ATTN: Is there a good way to say "unsupported"? 736 throw XmlException(code, _line); 737 } 738 739 referenceLength = numDigits + 4; 740 } 741 else 742 { 743 // Decode a decimal character reference 744 Uint32 newChar = 0; 745 char q = p+2; 746 747 kumpf 1.18 // At most five digits are allowed, plus trailing ';' 748 Uint32 numDigits; 749 for (numDigits = 0; numDigits < 6; numDigits++, q++) 750 { 751 if (isdigit(q)) 752 { 753 newChar = (newChar 10); 754 newChar += (q - '0'); 755 } 756 else if (q == ';') 757 { 758 break; 759 } 760 else 761 { 762 throw XmlException(code, _line); 763 } 764 } 765 766 // Decimal number must be 1 - 5 digits and fit in 16 bits 767 if ((numDigits == 0) \|\| (numDigits > 5) \|\| 768 kumpf 1.18 (newChar > 0xffff)) 769 { 770 throw XmlException(code, _line); 771 } 772 773 // ATTN: Currently do not support 16-bit characters 774 if (newChar > 0xff) 775 { 776 // ATTN: Is there a good way to say "unsupported"? 777 throw XmlException(code, _line); 778 } 779 780 referenceChar = Uint16(newChar); 781 referenceLength = numDigits + 3; 782 } 783 } 784 else 785 { 786 // Check for entity reference 787 // ATTN: Inefficient if many entity references are supported 788 Uint32 i; 789 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++) 790 { 791 Uint32 length = _references[i].length; 792 const char* match = _references[i].match; 793 794 if (strncmp(p, _references[i].match, length) == 0) 795 { 796 referenceChar = _references[i].replacement; 797 referenceLength = length; 798 break; 799 } 800 } 801 802 if (i == _REFERENCES_SIZE) 803 { 804 // Didn't recognize the entity reference 805 // ATTN: Is there a good way to say "unsupported"? 806 throw XmlException(code, _line); 807 } 808 } 809 810 kumpf 1.18 // Replace the reference with the correct character 811 p = (char)referenceChar; 812 char q = p + referenceLength; 813 rem = rem - referenceLength + 1; 814 memmove(p + 1, q, rem);
815 mike 1.13 } 816 } 817 } 818 819 static const char _EMPTY_STRING[] = ""; 820 821 void XmlParser::_getElement(char& p, XmlEntry& entry) 822 { 823 entry.attributeCount = 0; 824 825 //-------------------------------------------------------------------------- 826 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 827 //-------------------------------------------------------------------------- 828 829 if (p == '?') 830 { 831 entry.type = XmlEntry::XML_DECLARATION; 832 entry.text = ++p; 833 834 Boolean openCloseElement = false; 835 836 mike 1.13 if (_getElementName(p)) 837 return; 838 } 839 else if (p == '!') 840 { 841 p++; 842 843 // Expect a comment or CDATA: 844 845 if (p[0] == '-' && p[1] == '-') 846 { 847 p += 2; 848 entry.type = XmlEntry::COMMENT; 849 entry.text = p; 850 _getComment(p); 851 return; 852 } 853 else if (memcmp(p, "[CDATA[", 7) == 0) 854 { 855 p += 7; 856 entry.type = XmlEntry::CDATA; 857 mike 1.13 entry.text = p; 858 _getCData(p); 859 return; 860 } 861 else if (memcmp(p, "DOCTYPE", 7) == 0) 862 { 863 entry.type = XmlEntry::DOCTYPE; 864 entry.text = _EMPTY_STRING; 865 _getDocType(p); 866 return; 867 } 868 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line)); 869 } 870 else if (p == '/') 871 { 872 entry.type = XmlEntry::END_TAG; 873 entry.text = ++p; 874 875 if (!_getElementName(p)) 876 throw(XmlException(XmlException::BAD_END_TAG, _line)); 877 878 mike 1.13 return; 879 }
880 david 1.22 else if (String::isUTF8(p))
881 mike 1.13 { 882 entry.type = XmlEntry::START_TAG; 883 entry.text = p; 884 885 Boolean openCloseElement = false; 886 887 if (_getOpenElementName(p, openCloseElement)) 888 { 889 if (openCloseElement) 890 entry.type = XmlEntry::EMPTY_TAG; 891 return; 892 } 893 } 894 else 895 throw XmlException(XmlException::BAD_START_TAG, _line); 896 897 //-------------------------------------------------------------------------- 898 // Grab all the attributes: 899 //-------------------------------------------------------------------------- 900 901 for (;;) 902 mike 1.13 { 903 if (entry.type == XmlEntry::XML_DECLARATION) 904 { 905 if (p[0] == '?' && p[1] == '>') 906 { 907 p += 2; 908 return; 909 } 910 } 911 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 912 { 913 entry.type = XmlEntry::EMPTY_TAG; 914 p += 2; 915 return; 916 } 917 else if (p == '>') 918 { 919 p++; 920 return; 921 } 922 923 mike 1.13 XmlAttribute attr; 924 attr.name = p; 925 _getAttributeNameAndEqual(p); 926 927 if (p != '"' && p != '\'') 928 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 929 930 attr.value = p + 1; 931 _getAttributeValue(p); 932 933 if (entry.type == XmlEntry::XML_DECLARATION) 934 { 935 // The next thing must a space or a "?>": 936 937 if (!(p[0] == '?' && p[1] == '>') && !isspace(p)) 938 { 939 throw XmlException( 940 XmlException::BAD_ATTRIBUTE_VALUE, _line); 941 } 942 } 943 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| isspace(p))) 944 mike 1.13 { 945 // The next thing must be a space or a '>': 946 947 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 948 } 949 950 _skipWhitespace(p); 951 952 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) 953 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); 954 955 _substituteReferences((char)attr.value); 956 entry.attributes[entry.attributeCount++] = attr; 957 } 958 } 959 960 static const char _typeStrings[] = 961 { 962 "XML_DECLARATION", 963 "START_TAG", 964 "EMPTY_TAG", 965 mike 1.13 "END_TAG", 966 "COMMENT", 967 "CDATA", 968 "DOCTYPE", 969 "CONTENT" 970 }; 971 972 void XmlEntry::print() const 973 { 974 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 975 976 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 977 978 if (needQuotes) 979 PEGASUS_STD(cout) << "\""; 980 981 _printValue(text); 982 983 if (needQuotes) 984 PEGASUS_STD(cout) << "\""; 985 986 mike 1.13 PEGASUS_STD(cout) << '\n'; 987 988 for (Uint32 i = 0; i < attributeCount; i++) 989 { 990 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 991 _printValue(attributes[i].value); 992 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl); 993 } 994 } 995 996 const XmlAttribute* XmlEntry::findAttribute( 997 const char* name) const 998 { 999 for (Uint32 i = 0; i < attributeCount; i++) 1000 { 1001 if (strcmp(attributes[i].name, name) == 0) 1002 return &attributes[i]; 1003 } 1004 1005 return 0; 1006 } 1007 mike 1.13 1008 // Find first non-whitespace character (set first) and last non-whitespace 1009 // character (set last one past this). For example, consider this string: 1010 // 1011 // " 87 " 1012 // 1013 // The first pointer would point to '8' and the last pointer woudl point one 1014 // beyond '7'. 1015 1016 static void _findEnds( 1017 const char* str, 1018 const char& first, 1019 const char& last) 1020 { 1021 first = str; 1022 1023 while (isspace(first)) 1024 first++; 1025 1026 if (!first) 1027 { 1028 mike 1.13 last = first; 1029 return; 1030 } 1031 1032 last = first + strlen(first); 1033 1034 while (last != first && isspace(last[-1])) 1035 last--; 1036 } 1037 1038 Boolean XmlEntry::getAttributeValue( 1039 const char* name, 1040 Uint32& value) const 1041 { 1042 const XmlAttribute* attr = findAttribute(name); 1043 1044 if (!attr) 1045 return false; 1046 1047 const char* first; 1048 const char* last; 1049 mike 1.13 _findEnds(attr->value, first, last); 1050 1051 char* end = 0; 1052 long tmp = strtol(first, &end, 10); 1053 1054 if (!end \|\| end != last) 1055 return false; 1056 1057 value = Uint32(tmp); 1058 return true; 1059 } 1060 1061 Boolean XmlEntry::getAttributeValue( 1062 const char* name, 1063 Real32& value) const 1064 { 1065 const XmlAttribute* attr = findAttribute(name); 1066 1067 if (!attr) 1068 return false; 1069 1070 mike 1.13 const char* first; 1071 const char* last; 1072 _findEnds(attr->value, first, last); 1073 1074 char* end = 0; 1075 double tmp = strtod(first, &end); 1076 1077 if (!end \|\| end != last) 1078 return false; 1079 1080 value = Uint32(tmp); 1081 return true; 1082 } 1083 1084 Boolean XmlEntry::getAttributeValue( 1085 const char* name, 1086 const char& value) const 1087 { 1088 const XmlAttribute attr = findAttribute(name); 1089 1090 if (!attr) 1091 mike 1.13 return false; 1092 1093 value = attr->value; 1094 return true; 1095 } 1096 1097 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1098 { 1099 const char* tmp; 1100 1101 if (!getAttributeValue(name, tmp)) 1102 return false; 1103
1104 david 1.22 value = String(tmp,STRING_FLAG_UTF8);
1105 mike 1.13 return true; 1106 } 1107 1108 void XmlAppendCString(Array<Sint8>& out, const char* str) 1109 { 1110 out.append(str, strlen(str)); 1111 } 1112 1113 PEGASUS_NAMESPACE_END

No CVS admin address has been configured