pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 mike 1.13 //%///////////////////////////////////////////////////////////////////////////// 2 //
3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM, 4 // The Open Group, Tivoli Systems
5 mike 1.13 // 6 // Permission is hereby granted, free of charge, to any person obtaining a copy
7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to 8 // deal in the Software without restriction, including without limitation the 9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is 11 // furnished to do so, subject to the following conditions: 12 //
13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
14 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 // 22 //============================================================================== 23 // 24 // Author: Mike Brasher (mbrasher@bmc.com) 25 // 26 // Modified By: 27 // 28 //%///////////////////////////////////////////////////////////////////////////// 29 30 //////////////////////////////////////////////////////////////////////////////// 31 // 32 // XmlParser 33 // 34 // This file contains a simple non-validating XML parser. Here are 35 // serveral rules for well-formed XML: 36 // 37 // 1. Documents must begin with an XML declaration: 38 // 39 // <?xml version="1.0" standalone="yes"?> 40 mike 1.13 // 41 // 2. Comments have the form: 42 // 43 // <!-- blah blah blah --> 44 // 45 // 3. The following entity references are supported: 46 // 47 // &amp - ampersand 48 // &lt - less-than 49 // &gt - greater-than 50 // &quot - full quote 51 // &apos - apostrophe 52 //
53 kumpf 1.18 // as well as character (numeric) references: 54 55 // 1 - decimal reference for character '1' 56 // 1 - hexadecimal reference for character '1' 57 //
58 mike 1.13 // 4. Element names and attribute names take the following form: 59 // 60 // [A-Za-z_][A-Za-z_0-9-.:] 61 // 62 // 5. Arbitrary data (CDATA) can be enclosed like this: 63 // 64 // <![CDATA[ 65 // ... 66 // ]]> 67 // 68 // 6. Element names and attributes names are case-sensitive. 69 // 70 // 7. XmlAttribute values must be delimited by full or half quotes. 71 // XmlAttribute values must be delimited. 72 // 73 // 8. <!DOCTYPE...> 74 // 75 // TODO: 76 //
77 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
78 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing 79 // rules rather than references to files). 80 // 81 // Remove newlines from string literals: 82 // 83 // Example: <xyz x="hello 84 // world"> 85 // 86 //////////////////////////////////////////////////////////////////////////////// 87
88 sage 1.14 #include <Pegasus/Common/Config.h>
89 mike 1.13 #include <cctype> 90 #include <cstdio> 91 #include <cstdlib> 92 #include <cstring> 93 #include "XmlParser.h" 94 #include "Logger.h"
95 chuck 1.19 #include "ExceptionRep.h"
96 mike 1.13 97 PEGASUS_NAMESPACE_BEGIN 98 99 #define PEGASUS_ARRAY_T XmlEntry 100 # include "ArrayImpl.h" 101 #undef PEGASUS_ARRAY_T 102 103 104 //////////////////////////////////////////////////////////////////////////////// 105 // 106 // Static helper functions 107 // 108 //////////////////////////////////////////////////////////////////////////////// 109 110 static void _printValue(const char* p) 111 { 112 for (; p; p++) 113 { 114 if (p == '\n') 115 PEGASUS_STD(cout) << "\\n"; 116 else if (p == '\r') 117 mike 1.13 PEGASUS_STD(cout) << "\\r"; 118 else if (p == '\t') 119 PEGASUS_STD(cout) << "\\t"; 120 else 121 PEGASUS_STD(cout) << p; 122 } 123 } 124 125 struct EntityReference 126 { 127 const char match; 128 Uint32 length; 129 char replacement; 130 }; 131
132 kumpf 1.18 // ATTN: Add support for more entity references
133 mike 1.13 static EntityReference _references[] = 134 { 135 { "&", 5, '&' }, 136 { "<", 4, '<' }, 137 { ">", 4, '>' }, 138 { """, 6, '"' }, 139 { "'", 6, '\'' } 140 }; 141 142 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); 143 144 // Remove all redundant spaces from the given string: 145 146 static void _normalize(char* text) 147 { 148 Uint32 length = strlen(text); 149 char* p = text; 150 char* end = p + length; 151 152 // Remove leading spaces: 153 154 mike 1.13 while (isspace(p)) 155 p++; 156 157 if (p != text) 158 memmove(text, p, end - p + 1); 159 160 p = text; 161 162 // Look for sequences of more than one space and remove all but one. 163 164 for (;;) 165 { 166 // Advance to the next space: 167 168 while (p && !isspace(p)) 169 p++; 170 171 if (!p) 172 break; 173 174 // Advance to the next non-space: 175 mike 1.13 176 char* q = p++; 177 178 while (isspace(p)) 179 p++; 180 181 // Discard trailing spaces (if we are at the end): 182 183 if (!p) 184 { 185 q = '\0'; 186 break; 187 } 188 189 // Remove the redundant spaces: 190 191 Uint32 n = p - q; 192 193 if (n > 1) 194 { 195 q++ = ' '; 196 mike 1.13 memmove(q, p, end - p + 1); 197 p = q; 198 } 199 } 200 } 201 202 //////////////////////////////////////////////////////////////////////////////// 203 // 204 // XmlException 205 // 206 //////////////////////////////////////////////////////////////////////////////// 207 208 static const char* _xmlMessages[] = 209 { 210 "Bad opening element", 211 "Bad closing element", 212 "Bad attribute name", 213 "Exepected equal sign", 214 "Bad attribute value", 215 "A \"--\" sequence found within comment", 216 "Unterminated comment", 217 mike 1.13 "Unterminated CDATA block", 218 "Unterminated DOCTYPE", 219 "Too many attributes: parser only handles 10", 220 "Malformed reference", 221 "Expected a comment or CDATA following \"<!\" sequence", 222 "Closing element does not match opening element", 223 "One or more tags are still open", 224 "More than one root element was encountered", 225 "Validation error", 226 "Semantic error" 227 }; 228
229 chuck 1.19 static const char* _xmlKeys[] = 230 {
231 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
232 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 233 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 234 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 235 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 236 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 237 "Common.XmlParser.UNTERMINATED_COMMENT", 238 "Common.XmlParser.UNTERMINATED_CDATA", 239 "Common.XmlParser.UNTERMINATED_DOCTYPE", 240 "Common.XmlParser.TOO_MANY_ATTRIBUTES", 241 "Common.XmlParser.MALFORMED_REFERENCE", 242 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 243 "Common.XmlParser.START_END_MISMATCH", 244 "Common.XmlParser.UNCLOSED_TAGS", 245 "Common.XmlParser.MULTIPLE_ROOTS", 246 "Common.XmlParser.VALIDATION_ERROR", 247 "Common.XmlParser.SEMANTIC_ERROR" 248 }; 249
250 chuck 1.23 // l10n replace _formMessage (comment out the old one)
251 chuck 1.19 /*
252 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message) 253 { 254 String result = _xmlMessages[Uint32(code) - 1]; 255 256 char buffer[32]; 257 sprintf(buffer, "%d", line); 258 result.append(": on line "); 259 result.append(buffer); 260 261 if (message.size()) 262 { 263 result.append(": "); 264 result.append(message); 265 } 266 267 return result; 268 }
269 chuck 1.19 */ 270 271 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message) 272 { 273 String dftMsg = _xmlMessages[Uint32(code) - 1]; 274 String key = _xmlKeys[Uint32(code) - 1]; 275 String msg = message; 276 277 dftMsg.append(": on line $0"); 278 if (message.size()) 279 {
280 humberto 1.20 msg = ": " + msg;
281 chuck 1.19 dftMsg.append("$1"); 282 } 283 284 return MessageLoaderParms(key, dftMsg, line ,msg); 285 } 286 287 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 288 { 289 String dftMsg = _xmlMessages[Uint32(code) - 1]; 290 String key = _xmlKeys[Uint32(code) - 1]; 291 292 dftMsg.append(": on line $0"); 293 294 return MessageLoaderParms(key, dftMsg, line); 295 } 296
297 mike 1.13 298 XmlException::XmlException( 299 XmlException::Code code, 300 Uint32 lineNumber, 301 const String& message) 302 : Exception(_formMessage(code, lineNumber, message)) 303 { 304 305 } 306
307 chuck 1.19 308 XmlException::XmlException( 309 XmlException::Code code, 310 Uint32 lineNumber, 311 MessageLoaderParms& msgParms) 312 : Exception(_formPartialMessage(code, lineNumber)) 313 {
314 humberto 1.21 if (msgParms.default_msg.size()) 315 { 316 msgParms.default_msg = ": " + msgParms.default_msg; 317 }
318 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms)); 319 } 320 321
322 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 323 // 324 // XmlValidationError 325 // 326 //////////////////////////////////////////////////////////////////////////////// 327 328 XmlValidationError::XmlValidationError( 329 Uint32 lineNumber, 330 const String& message) 331 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 332 { 333 334 } 335
336 chuck 1.19 337 XmlValidationError::XmlValidationError( 338 Uint32 lineNumber, 339 MessageLoaderParms& msgParms) 340 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 341 { 342 343 } 344 345
346 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 347 // 348 // XmlSemanticError 349 // 350 //////////////////////////////////////////////////////////////////////////////// 351 352 XmlSemanticError::XmlSemanticError( 353 Uint32 lineNumber, 354 const String& message) 355 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 356 { 357 358 }
359 chuck 1.19 360 361 XmlSemanticError::XmlSemanticError( 362 Uint32 lineNumber, 363 MessageLoaderParms& msgParms) 364 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 365 { 366 367 } 368
369 mike 1.13 370 //////////////////////////////////////////////////////////////////////////////// 371 // 372 // XmlParser 373 // 374 //////////////////////////////////////////////////////////////////////////////// 375 376 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 377 _restoreChar('\0'), _foundRoot(false) 378 { 379 380 } 381 382 Boolean XmlParser::next(XmlEntry& entry) 383 { 384 if (!_putBackStack.isEmpty()) 385 { 386 entry = _putBackStack.top(); 387 _putBackStack.pop(); 388 return true; 389 } 390 mike 1.13 391 // If a character was overwritten with a null-terminator the last 392 // time this routine was called, then put back that character. Before 393 // exiting of course, restore the null-terminator. 394 395 char* nullTerminator = 0; 396 397 if (_restoreChar && !_current) 398 { 399 nullTerminator = _current; 400 _current = _restoreChar; 401 _restoreChar = '\0'; 402 } 403 404 // Skip over any whitespace: 405 406 _skipWhitespace(_current); 407 408 if (!_current) 409 { 410 if (nullTerminator) 411 mike 1.13 nullTerminator = '\0'; 412 413 if (!_stack.isEmpty()) 414 throw XmlException(XmlException::UNCLOSED_TAGS, _line); 415 416 return false; 417 } 418 419 // Either a "<...>" or content begins next: 420 421 if (_current == '<') 422 { 423 _current++; 424 _getElement(_current, entry); 425 426 if (nullTerminator) 427 nullTerminator = '\0'; 428 429 if (entry.type == XmlEntry::START_TAG) 430 { 431 if (_stack.isEmpty() && _foundRoot) 432 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line); 433 434 _foundRoot = true; 435 _stack.push((char)entry.text); 436 } 437 else if (entry.type == XmlEntry::END_TAG) 438 { 439 if (_stack.isEmpty()) 440 throw XmlException(XmlException::START_END_MISMATCH, _line); 441 442 if (strcmp(_stack.top(), entry.text) != 0) 443 throw XmlException(XmlException::START_END_MISMATCH, _line); 444 445 _stack.pop(); 446 } 447 448 return true; 449 } 450 else 451 { 452 entry.type = XmlEntry::CONTENT; 453 mike 1.13 entry.text = _current; 454 _getContent(_current); 455 _restoreChar = _current; 456 _current = '\0'; 457 458 if (nullTerminator) 459 nullTerminator = '\0'; 460 461 _substituteReferences((char)entry.text); 462 _normalize((char)entry.text); 463 464 return true; 465 } 466 } 467 468 void XmlParser::putBack(XmlEntry& entry) 469 { 470 _putBackStack.push(entry); 471 } 472 473 XmlParser::~XmlParser() 474 mike 1.13 { 475 // Nothing to do! 476 } 477 478 void XmlParser::_skipWhitespace(char& p) 479 { 480 while (p && isspace(p)) 481 { 482 if (p == '\n') 483 _line++; 484 485 p++; 486 } 487 } 488 489 Boolean XmlParser::_getElementName(char*& p) 490 {
491 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
492 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 493 (*p == '_')))
494 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
495 kumpf 1.24 p++;
496 mike 1.13
497 david 1.22 while ((p) && 498 (((p >= 'A') && (p <= 'Z')) \|\| 499 ((p >= 'a') && (*p <= 'z')) \|\|
500 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
501 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
502 mike 1.13 p++; 503 504 // The next character must be a space: 505 506 if (isspace(p)) 507 { 508 p++ = '\0'; 509 _skipWhitespace(p); 510 } 511 512 if (p == '>') 513 { 514 p++ = '\0'; 515 return true; 516 } 517 518 return false; 519 } 520 521 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement) 522 { 523 mike 1.13 openCloseElement = false; 524
525 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
526 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 527 (*p == '_')))
528 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
529 kumpf 1.24 p++;
530 mike 1.13
531 david 1.22 while ((p) && 532 (((p >= 'A') && (p <= 'Z')) \|\| 533 ((p >= 'a') && (*p <= 'z')) \|\|
534 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
535 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
536 mike 1.13 p++; 537 538 // The next character must be a space: 539 540 if (isspace(p)) 541 { 542 p++ = '\0'; 543 _skipWhitespace(p); 544 } 545 546 if (p == '>') 547 { 548 p++ = '\0'; 549 return true; 550 } 551 552 if (p[0] == '/' && p[1] == '>') 553 { 554 openCloseElement = true; 555 p = '\0'; 556 p += 2; 557 mike 1.13 return true; 558 } 559 560 return false; 561 } 562 563 void XmlParser::_getAttributeNameAndEqual(char& p) 564 {
565 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
566 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 567 (*p == '_')))
568 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
569 kumpf 1.24 p++;
570 mike 1.13
571 david 1.22 while ((p) && 572 (((p >= 'A') && (p <= 'Z')) \|\| 573 ((p >= 'a') && (*p <= 'z')) \|\|
574 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
575 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
576 mike 1.13 p++; 577 578 char* term = p; 579 580 _skipWhitespace(p); 581 582 if (p != '=') 583 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); 584 585 p++; 586 587 _skipWhitespace(p); 588 589 term = '\0'; 590 } 591 592 void XmlParser::_getAttributeValue(char& p) 593 { 594 // ATTN-B: handle values contained in semiquotes: 595 596 if (p != '"' && p != '\'') 597 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 598 599 char startChar = p++; 600 601 while (p && p != startChar) 602 p++; 603 604 if (p != startChar) 605 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 606 607 p++ = '\0'; 608 } 609 610 void XmlParser::_getComment(char& p) 611 { 612 // Now p points to first non-whitespace character beyond "<--" sequence: 613 614 for (; p; p++) 615 { 616 if (p[0] == '-' && p[1] == '-') 617 { 618 mike 1.13 if (p[2] != '>') 619 { 620 throw XmlException( 621 XmlException::MINUS_MINUS_IN_COMMENT, _line); 622 } 623 624 // Find end of comment (excluding whitespace): 625 626 p = '\0'; 627 p += 3; 628 return; 629 } 630 } 631 632 // If it got this far, then the comment is unterminated: 633 634 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 635 } 636 637 void XmlParser::_getCData(char& p) 638 { 639 mike 1.13 // At this point p points one past "<![CDATA[" sequence: 640 641 for (; p; p++) 642 { 643 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 644 { 645 p = '\0'; 646 p += 3; 647 return; 648 } 649 else if (p == '\n') 650 _line++; 651 } 652 653 // If it got this far, then the comment is unterminated: 654 655 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 656 } 657 658 void XmlParser::_getDocType(char& p) 659 { 660 mike 1.13 // Just ignore the DOCTYPE command for now: 661 662 for (; p && p != '>'; p++) 663 { 664 if (p == '\n') 665 _line++; 666 } 667 668 if (p != '>') 669 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line); 670 671 p++; 672 } 673 674 void XmlParser::_getContent(char& p) 675 { 676 while (p && p != '<') 677 { 678 if (p == '\n') 679 _line++; 680 681 mike 1.13 p++; 682 } 683 } 684 685 void XmlParser::_substituteReferences(char* text) 686 { 687 Uint32 rem = strlen(text); 688 689 for (char* p = text; p; p++, rem--) 690 { 691 if (p == '&') 692 {
693 kumpf 1.18 // Process character or entity reference
694 mike 1.13
695 kumpf 1.18 Uint16 referenceChar = 0; 696 Uint32 referenceLength = 0; 697 XmlException::Code code = XmlException::MALFORMED_REFERENCE; 698 699 if ((p+1) == '#') 700 { 701 // Found a character (numeric) reference 702 // Determine whether it is decimal or hex 703 if ((p+2) == 'x') 704 { 705 // Decode a hexadecimal character reference 706 char* q = p+3; 707 708 // At most four digits are allowed, plus trailing ';' 709 Uint32 numDigits; 710 for (numDigits = 0; numDigits < 5; numDigits++, q++) 711 { 712 if (isdigit(q)) 713 { 714 referenceChar = (referenceChar << 4); 715 referenceChar += (q - '0'); 716 kumpf 1.18 } 717 else if ((q >= 'A') && (q <= 'F')) 718 { 719 referenceChar = (referenceChar << 4); 720 referenceChar += (q - 'A' + 10); 721 } 722 else if ((q >= 'a') && (q <= 'f')) 723 { 724 referenceChar = (referenceChar << 4); 725 referenceChar += (q - 'a' + 10); 726 } 727 else if (q == ';') 728 { 729 break; 730 } 731 else 732 { 733 throw XmlException(code, _line); 734 } 735 } 736 737 kumpf 1.18 // Hex number must be 1 - 4 digits 738 if ((numDigits == 0) \|\| (numDigits > 4)) 739 { 740 throw XmlException(code, _line); 741 } 742 743 // ATTN: Currently do not support 16-bit characters 744 if (referenceChar > 0xff) 745 { 746 // ATTN: Is there a good way to say "unsupported"? 747 throw XmlException(code, _line); 748 } 749 750 referenceLength = numDigits + 4; 751 } 752 else 753 { 754 // Decode a decimal character reference 755 Uint32 newChar = 0; 756 char q = p+2; 757 758 kumpf 1.18 // At most five digits are allowed, plus trailing ';' 759 Uint32 numDigits; 760 for (numDigits = 0; numDigits < 6; numDigits++, q++) 761 { 762 if (isdigit(q)) 763 { 764 newChar = (newChar 10); 765 newChar += (q - '0'); 766 } 767 else if (q == ';') 768 { 769 break; 770 } 771 else 772 { 773 throw XmlException(code, _line); 774 } 775 } 776 777 // Decimal number must be 1 - 5 digits and fit in 16 bits 778 if ((numDigits == 0) \|\| (numDigits > 5) \|\| 779 kumpf 1.18 (newChar > 0xffff)) 780 { 781 throw XmlException(code, _line); 782 } 783 784 // ATTN: Currently do not support 16-bit characters 785 if (newChar > 0xff) 786 { 787 // ATTN: Is there a good way to say "unsupported"? 788 throw XmlException(code, _line); 789 } 790 791 referenceChar = Uint16(newChar); 792 referenceLength = numDigits + 3; 793 } 794 } 795 else 796 { 797 // Check for entity reference 798 // ATTN: Inefficient if many entity references are supported 799 Uint32 i; 800 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++) 801 { 802 Uint32 length = _references[i].length; 803 const char* match = _references[i].match; 804 805 if (strncmp(p, _references[i].match, length) == 0) 806 { 807 referenceChar = _references[i].replacement; 808 referenceLength = length; 809 break; 810 } 811 } 812 813 if (i == _REFERENCES_SIZE) 814 { 815 // Didn't recognize the entity reference 816 // ATTN: Is there a good way to say "unsupported"? 817 throw XmlException(code, _line); 818 } 819 } 820 821 kumpf 1.18 // Replace the reference with the correct character 822 p = (char)referenceChar; 823 char q = p + referenceLength; 824 rem = rem - referenceLength + 1; 825 memmove(p + 1, q, rem);
826 mike 1.13 } 827 } 828 } 829 830 static const char _EMPTY_STRING[] = ""; 831 832 void XmlParser::_getElement(char& p, XmlEntry& entry) 833 { 834 entry.attributeCount = 0; 835 836 //-------------------------------------------------------------------------- 837 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 838 //-------------------------------------------------------------------------- 839 840 if (p == '?') 841 { 842 entry.type = XmlEntry::XML_DECLARATION; 843 entry.text = ++p; 844 845 Boolean openCloseElement = false; 846 847 mike 1.13 if (_getElementName(p)) 848 return; 849 } 850 else if (p == '!') 851 { 852 p++; 853 854 // Expect a comment or CDATA: 855 856 if (p[0] == '-' && p[1] == '-') 857 { 858 p += 2; 859 entry.type = XmlEntry::COMMENT; 860 entry.text = p; 861 _getComment(p); 862 return; 863 } 864 else if (memcmp(p, "[CDATA[", 7) == 0) 865 { 866 p += 7; 867 entry.type = XmlEntry::CDATA; 868 mike 1.13 entry.text = p; 869 _getCData(p); 870 return; 871 } 872 else if (memcmp(p, "DOCTYPE", 7) == 0) 873 { 874 entry.type = XmlEntry::DOCTYPE; 875 entry.text = _EMPTY_STRING; 876 _getDocType(p); 877 return; 878 } 879 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line)); 880 } 881 else if (p == '/') 882 { 883 entry.type = XmlEntry::END_TAG; 884 entry.text = ++p; 885 886 if (!_getElementName(p)) 887 throw(XmlException(XmlException::BAD_END_TAG, _line)); 888 889 mike 1.13 return; 890 }
891 david 1.25 else if ((((p >= 'A') && (p <= 'Z')) \|\|
892 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 893 (*p == '_')))
894 mike 1.13 { 895 entry.type = XmlEntry::START_TAG; 896 entry.text = p; 897 898 Boolean openCloseElement = false; 899 900 if (_getOpenElementName(p, openCloseElement)) 901 { 902 if (openCloseElement) 903 entry.type = XmlEntry::EMPTY_TAG; 904 return; 905 } 906 } 907 else 908 throw XmlException(XmlException::BAD_START_TAG, _line); 909 910 //-------------------------------------------------------------------------- 911 // Grab all the attributes: 912 //-------------------------------------------------------------------------- 913 914 for (;;) 915 mike 1.13 { 916 if (entry.type == XmlEntry::XML_DECLARATION) 917 { 918 if (p[0] == '?' && p[1] == '>') 919 { 920 p += 2; 921 return; 922 } 923 } 924 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 925 { 926 entry.type = XmlEntry::EMPTY_TAG; 927 p += 2; 928 return; 929 } 930 else if (p == '>') 931 { 932 p++; 933 return; 934 } 935 936 mike 1.13 XmlAttribute attr; 937 attr.name = p; 938 _getAttributeNameAndEqual(p); 939 940 if (p != '"' && p != '\'') 941 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 942 943 attr.value = p + 1; 944 _getAttributeValue(p); 945 946 if (entry.type == XmlEntry::XML_DECLARATION) 947 { 948 // The next thing must a space or a "?>": 949 950 if (!(p[0] == '?' && p[1] == '>') && !isspace(p)) 951 { 952 throw XmlException( 953 XmlException::BAD_ATTRIBUTE_VALUE, _line); 954 } 955 } 956 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| isspace(p))) 957 mike 1.13 { 958 // The next thing must be a space or a '>': 959 960 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 961 } 962 963 _skipWhitespace(p); 964 965 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) 966 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); 967 968 _substituteReferences((char)attr.value); 969 entry.attributes[entry.attributeCount++] = attr; 970 } 971 } 972 973 static const char _typeStrings[] = 974 { 975 "XML_DECLARATION", 976 "START_TAG", 977 "EMPTY_TAG", 978 mike 1.13 "END_TAG", 979 "COMMENT", 980 "CDATA", 981 "DOCTYPE", 982 "CONTENT" 983 }; 984 985 void XmlEntry::print() const 986 { 987 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 988 989 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 990 991 if (needQuotes) 992 PEGASUS_STD(cout) << "\""; 993 994 _printValue(text); 995 996 if (needQuotes) 997 PEGASUS_STD(cout) << "\""; 998 999 mike 1.13 PEGASUS_STD(cout) << '\n'; 1000 1001 for (Uint32 i = 0; i < attributeCount; i++) 1002 { 1003 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 1004 _printValue(attributes[i].value); 1005 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl); 1006 } 1007 } 1008 1009 const XmlAttribute* XmlEntry::findAttribute( 1010 const char* name) const 1011 { 1012 for (Uint32 i = 0; i < attributeCount; i++) 1013 { 1014 if (strcmp(attributes[i].name, name) == 0) 1015 return &attributes[i]; 1016 } 1017 1018 return 0; 1019 } 1020 mike 1.13 1021 // Find first non-whitespace character (set first) and last non-whitespace 1022 // character (set last one past this). For example, consider this string: 1023 // 1024 // " 87 " 1025 // 1026 // The first pointer would point to '8' and the last pointer woudl point one 1027 // beyond '7'. 1028 1029 static void _findEnds( 1030 const char* str, 1031 const char& first, 1032 const char& last) 1033 { 1034 first = str; 1035 1036 while (isspace(first)) 1037 first++; 1038 1039 if (!first) 1040 { 1041 mike 1.13 last = first; 1042 return; 1043 } 1044 1045 last = first + strlen(first); 1046 1047 while (last != first && isspace(last[-1])) 1048 last--; 1049 } 1050 1051 Boolean XmlEntry::getAttributeValue( 1052 const char* name, 1053 Uint32& value) const 1054 { 1055 const XmlAttribute* attr = findAttribute(name); 1056 1057 if (!attr) 1058 return false; 1059 1060 const char* first; 1061 const char* last; 1062 mike 1.13 _findEnds(attr->value, first, last); 1063 1064 char* end = 0; 1065 long tmp = strtol(first, &end, 10); 1066 1067 if (!end \|\| end != last) 1068 return false; 1069 1070 value = Uint32(tmp); 1071 return true; 1072 } 1073 1074 Boolean XmlEntry::getAttributeValue( 1075 const char* name, 1076 Real32& value) const 1077 { 1078 const XmlAttribute* attr = findAttribute(name); 1079 1080 if (!attr) 1081 return false; 1082 1083 mike 1.13 const char* first; 1084 const char* last; 1085 _findEnds(attr->value, first, last); 1086 1087 char* end = 0; 1088 double tmp = strtod(first, &end); 1089 1090 if (!end \|\| end != last) 1091 return false; 1092 1093 value = Uint32(tmp); 1094 return true; 1095 } 1096 1097 Boolean XmlEntry::getAttributeValue( 1098 const char* name, 1099 const char& value) const 1100 { 1101 const XmlAttribute attr = findAttribute(name); 1102 1103 if (!attr) 1104 mike 1.13 return false; 1105 1106 value = attr->value; 1107 return true; 1108 } 1109 1110 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1111 { 1112 const char* tmp; 1113 1114 if (!getAttributeValue(name, tmp)) 1115 return false; 1116
1117 david 1.22 value = String(tmp,STRING_FLAG_UTF8);
1118 mike 1.13 return true; 1119 } 1120 1121 void XmlAppendCString(Array<Sint8>& out, const char* str) 1122 { 1123 out.append(str, strlen(str)); 1124 } 1125 1126 PEGASUS_NAMESPACE_END

No CVS admin address has been configured