pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.27 //%2003////////////////////////////////////////////////////////////////////////
2 mike 1.13 //
3 karl 1.27 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Development 4 // Company, L. P., IBM Corp., The Open Group, Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L. P.; 6 // IBM Corp.; EMC Corporation, The Open Group.
7 mike 1.13 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 kumpf 1.16 // of this software and associated documentation files (the "Software"), to 10 // deal in the Software without restriction, including without limitation the 11 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 //
15 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
16 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 17 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
18 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 19 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 // 24 //============================================================================== 25 // 26 // Author: Mike Brasher (mbrasher@bmc.com) 27 // 28 // Modified By: 29 // 30 //%///////////////////////////////////////////////////////////////////////////// 31 32 //////////////////////////////////////////////////////////////////////////////// 33 // 34 // XmlParser 35 // 36 // This file contains a simple non-validating XML parser. Here are 37 // serveral rules for well-formed XML: 38 // 39 // 1. Documents must begin with an XML declaration: 40 // 41 // <?xml version="1.0" standalone="yes"?> 42 mike 1.13 // 43 // 2. Comments have the form: 44 // 45 // <!-- blah blah blah --> 46 // 47 // 3. The following entity references are supported: 48 // 49 // &amp - ampersand 50 // &lt - less-than 51 // &gt - greater-than 52 // &quot - full quote 53 // &apos - apostrophe 54 //
55 kumpf 1.18 // as well as character (numeric) references: 56 57 // 1 - decimal reference for character '1' 58 // 1 - hexadecimal reference for character '1' 59 //
60 mike 1.13 // 4. Element names and attribute names take the following form: 61 // 62 // [A-Za-z_][A-Za-z_0-9-.:] 63 // 64 // 5. Arbitrary data (CDATA) can be enclosed like this: 65 // 66 // <![CDATA[ 67 // ... 68 // ]]> 69 // 70 // 6. Element names and attributes names are case-sensitive. 71 // 72 // 7. XmlAttribute values must be delimited by full or half quotes. 73 // XmlAttribute values must be delimited. 74 // 75 // 8. <!DOCTYPE...> 76 // 77 // TODO: 78 //
79 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
80 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing 81 // rules rather than references to files). 82 // 83 // Remove newlines from string literals: 84 // 85 // Example: <xyz x="hello 86 // world"> 87 // 88 //////////////////////////////////////////////////////////////////////////////// 89
90 sage 1.14 #include <Pegasus/Common/Config.h>
91 mike 1.13 #include <cctype> 92 #include <cstdio> 93 #include <cstdlib> 94 #include <cstring> 95 #include "XmlParser.h" 96 #include "Logger.h"
97 chuck 1.19 #include "ExceptionRep.h"
98 mike 1.13 99 PEGASUS_NAMESPACE_BEGIN 100 101 #define PEGASUS_ARRAY_T XmlEntry 102 # include "ArrayImpl.h" 103 #undef PEGASUS_ARRAY_T 104 105 106 //////////////////////////////////////////////////////////////////////////////// 107 // 108 // Static helper functions 109 // 110 //////////////////////////////////////////////////////////////////////////////// 111 112 static void _printValue(const char* p) 113 { 114 for (; p; p++) 115 { 116 if (p == '\n') 117 PEGASUS_STD(cout) << "\\n"; 118 else if (p == '\r') 119 mike 1.13 PEGASUS_STD(cout) << "\\r"; 120 else if (p == '\t') 121 PEGASUS_STD(cout) << "\\t"; 122 else 123 PEGASUS_STD(cout) << p; 124 } 125 } 126 127 struct EntityReference 128 { 129 const char match; 130 Uint32 length; 131 char replacement; 132 }; 133
134 kumpf 1.18 // ATTN: Add support for more entity references
135 mike 1.13 static EntityReference _references[] = 136 { 137 { "&", 5, '&' }, 138 { "<", 4, '<' }, 139 { ">", 4, '>' }, 140 { """, 6, '"' }, 141 { "'", 6, '\'' } 142 }; 143
144 chuck 1.26 145 // Implements a check for a whitespace character, without calling 146 // isspace( ). The isspace( ) function is locale-sensitive, 147 // and incorrectly flags some chars above 0x7f as whitespace. This 148 // causes the XmlParser to incorrectly parse UTF-8 data. 149 // 150 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) 151 // defines white space as: 152 // S ::= (#x20 \| #x9 \| #xD \| #xA)+ 153 static int _isspace(char c) 154 { 155 if (c == ' ' \|\| c == '\r' \|\| c == '\t' \|\| c == '\n') 156 return 1; 157 return 0; 158 } 159 160
161 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); 162 163 // Remove all redundant spaces from the given string: 164 165 static void _normalize(char* text) 166 { 167 Uint32 length = strlen(text); 168 char* p = text; 169 char* end = p + length; 170 171 // Remove leading spaces: 172
173 chuck 1.26 while (_isspace(*p)) 174 p++;
175 mike 1.13 176 if (p != text) 177 memmove(text, p, end - p + 1); 178 179 p = text; 180 181 // Look for sequences of more than one space and remove all but one. 182 183 for (;;) 184 { 185 // Advance to the next space: 186
187 chuck 1.26 while (p && !_isspace(p))
188 mike 1.13 p++; 189 190 if (!p) 191 break; 192 193 // Advance to the next non-space: 194 195 char q = p++; 196
197 chuck 1.26 while (_isspace(*p))
198 mike 1.13 p++; 199 200 // Discard trailing spaces (if we are at the end): 201 202 if (!p) 203 { 204 q = '\0'; 205 break; 206 } 207 208 // Remove the redundant spaces: 209 210 Uint32 n = p - q; 211 212 if (n > 1) 213 { 214 q++ = ' '; 215 memmove(q, p, end - p + 1); 216 p = q; 217 } 218 } 219 mike 1.13 } 220 221 //////////////////////////////////////////////////////////////////////////////// 222 // 223 // XmlException 224 // 225 //////////////////////////////////////////////////////////////////////////////// 226 227 static const char _xmlMessages[] = 228 { 229 "Bad opening element", 230 "Bad closing element", 231 "Bad attribute name", 232 "Exepected equal sign", 233 "Bad attribute value", 234 "A \"--\" sequence found within comment", 235 "Unterminated comment", 236 "Unterminated CDATA block", 237 "Unterminated DOCTYPE", 238 "Too many attributes: parser only handles 10", 239 "Malformed reference", 240 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence", 241 "Closing element does not match opening element", 242 "One or more tags are still open", 243 "More than one root element was encountered", 244 "Validation error", 245 "Semantic error" 246 }; 247
248 chuck 1.19 static const char* _xmlKeys[] = 249 {
250 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
251 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 252 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 253 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 254 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 255 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 256 "Common.XmlParser.UNTERMINATED_COMMENT", 257 "Common.XmlParser.UNTERMINATED_CDATA", 258 "Common.XmlParser.UNTERMINATED_DOCTYPE", 259 "Common.XmlParser.TOO_MANY_ATTRIBUTES", 260 "Common.XmlParser.MALFORMED_REFERENCE", 261 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 262 "Common.XmlParser.START_END_MISMATCH", 263 "Common.XmlParser.UNCLOSED_TAGS", 264 "Common.XmlParser.MULTIPLE_ROOTS", 265 "Common.XmlParser.VALIDATION_ERROR", 266 "Common.XmlParser.SEMANTIC_ERROR" 267 }; 268
269 chuck 1.23 // l10n replace _formMessage (comment out the old one)
270 chuck 1.19 /*
271 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message) 272 { 273 String result = _xmlMessages[Uint32(code) - 1]; 274 275 char buffer[32]; 276 sprintf(buffer, "%d", line); 277 result.append(": on line "); 278 result.append(buffer); 279 280 if (message.size()) 281 { 282 result.append(": "); 283 result.append(message); 284 } 285 286 return result; 287 }
288 chuck 1.19 */ 289 290 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message) 291 { 292 String dftMsg = _xmlMessages[Uint32(code) - 1]; 293 String key = _xmlKeys[Uint32(code) - 1]; 294 String msg = message; 295 296 dftMsg.append(": on line $0"); 297 if (message.size()) 298 {
299 humberto 1.20 msg = ": " + msg;
300 chuck 1.19 dftMsg.append("$1"); 301 } 302 303 return MessageLoaderParms(key, dftMsg, line ,msg); 304 } 305 306 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 307 { 308 String dftMsg = _xmlMessages[Uint32(code) - 1]; 309 String key = _xmlKeys[Uint32(code) - 1]; 310 311 dftMsg.append(": on line $0"); 312 313 return MessageLoaderParms(key, dftMsg, line); 314 } 315
316 mike 1.13 317 XmlException::XmlException( 318 XmlException::Code code, 319 Uint32 lineNumber, 320 const String& message) 321 : Exception(_formMessage(code, lineNumber, message)) 322 { 323 324 } 325
326 chuck 1.19 327 XmlException::XmlException( 328 XmlException::Code code, 329 Uint32 lineNumber, 330 MessageLoaderParms& msgParms) 331 : Exception(_formPartialMessage(code, lineNumber)) 332 {
333 humberto 1.21 if (msgParms.default_msg.size()) 334 { 335 msgParms.default_msg = ": " + msgParms.default_msg; 336 }
337 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms)); 338 } 339 340
341 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 342 // 343 // XmlValidationError 344 // 345 //////////////////////////////////////////////////////////////////////////////// 346 347 XmlValidationError::XmlValidationError( 348 Uint32 lineNumber, 349 const String& message) 350 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 351 { 352 353 } 354
355 chuck 1.19 356 XmlValidationError::XmlValidationError( 357 Uint32 lineNumber, 358 MessageLoaderParms& msgParms) 359 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 360 { 361 362 } 363 364
365 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 366 // 367 // XmlSemanticError 368 // 369 //////////////////////////////////////////////////////////////////////////////// 370 371 XmlSemanticError::XmlSemanticError( 372 Uint32 lineNumber, 373 const String& message) 374 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 375 { 376 377 }
378 chuck 1.19 379 380 XmlSemanticError::XmlSemanticError( 381 Uint32 lineNumber, 382 MessageLoaderParms& msgParms) 383 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 384 { 385 386 } 387
388 mike 1.13 389 //////////////////////////////////////////////////////////////////////////////// 390 // 391 // XmlParser 392 // 393 //////////////////////////////////////////////////////////////////////////////// 394 395 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 396 _restoreChar('\0'), _foundRoot(false) 397 { 398 399 } 400 401 Boolean XmlParser::next(XmlEntry& entry) 402 { 403 if (!_putBackStack.isEmpty()) 404 { 405 entry = _putBackStack.top(); 406 _putBackStack.pop(); 407 return true; 408 } 409 mike 1.13 410 // If a character was overwritten with a null-terminator the last 411 // time this routine was called, then put back that character. Before 412 // exiting of course, restore the null-terminator. 413 414 char* nullTerminator = 0; 415 416 if (_restoreChar && !_current) 417 { 418 nullTerminator = _current; 419 _current = _restoreChar; 420 _restoreChar = '\0'; 421 } 422 423 // Skip over any whitespace: 424 425 _skipWhitespace(_current); 426 427 if (!_current) 428 { 429 if (nullTerminator) 430 mike 1.13 nullTerminator = '\0'; 431 432 if (!_stack.isEmpty()) 433 throw XmlException(XmlException::UNCLOSED_TAGS, _line); 434 435 return false; 436 } 437 438 // Either a "<...>" or content begins next: 439 440 if (_current == '<') 441 { 442 _current++; 443 _getElement(_current, entry); 444 445 if (nullTerminator) 446 nullTerminator = '\0'; 447 448 if (entry.type == XmlEntry::START_TAG) 449 { 450 if (_stack.isEmpty() && _foundRoot) 451 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line); 452 453 _foundRoot = true; 454 _stack.push((char)entry.text); 455 } 456 else if (entry.type == XmlEntry::END_TAG) 457 { 458 if (_stack.isEmpty()) 459 throw XmlException(XmlException::START_END_MISMATCH, _line); 460 461 if (strcmp(_stack.top(), entry.text) != 0) 462 throw XmlException(XmlException::START_END_MISMATCH, _line); 463 464 _stack.pop(); 465 } 466 467 return true; 468 } 469 else 470 { 471 entry.type = XmlEntry::CONTENT; 472 mike 1.13 entry.text = _current; 473 _getContent(_current); 474 _restoreChar = _current; 475 _current = '\0'; 476 477 if (nullTerminator) 478 nullTerminator = '\0'; 479 480 _substituteReferences((char)entry.text); 481 _normalize((char)entry.text); 482 483 return true; 484 } 485 } 486 487 void XmlParser::putBack(XmlEntry& entry) 488 { 489 _putBackStack.push(entry); 490 } 491 492 XmlParser::~XmlParser() 493 mike 1.13 { 494 // Nothing to do! 495 } 496 497 void XmlParser::_skipWhitespace(char*& p) 498 {
499 chuck 1.26 while (p && _isspace(p))
500 mike 1.13 { 501 if (p == '\n') 502 _line++; 503 504 p++; 505 } 506 } 507 508 Boolean XmlParser::_getElementName(char& p) 509 {
510 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
511 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 512 (*p == '_')))
513 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
514 kumpf 1.24 p++;
515 mike 1.13
516 david 1.22 while ((p) && 517 (((p >= 'A') && (p <= 'Z')) \|\| 518 ((p >= 'a') && (*p <= 'z')) \|\|
519 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
520 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
521 mike 1.13 p++; 522 523 // The next character must be a space: 524
525 chuck 1.26 if (_isspace(*p))
526 mike 1.13 { 527 p++ = '\0'; 528 _skipWhitespace(p); 529 } 530 531 if (p == '>') 532 { 533 p++ = '\0'; 534 return true; 535 } 536 537 return false; 538 } 539 540 Boolean XmlParser::_getOpenElementName(char& p, Boolean& openCloseElement) 541 { 542 openCloseElement = false; 543
544 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
545 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 546 (*p == '_')))
547 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
548 kumpf 1.24 p++;
549 mike 1.13
550 david 1.22 while ((p) && 551 (((p >= 'A') && (p <= 'Z')) \|\| 552 ((p >= 'a') && (*p <= 'z')) \|\|
553 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
554 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
555 mike 1.13 p++; 556 557 // The next character must be a space: 558
559 chuck 1.26 if (_isspace(*p))
560 mike 1.13 { 561 p++ = '\0'; 562 _skipWhitespace(p); 563 } 564 565 if (p == '>') 566 { 567 p++ = '\0'; 568 return true; 569 } 570 571 if (p[0] == '/' && p[1] == '>') 572 { 573 openCloseElement = true; 574 p = '\0'; 575 p += 2; 576 return true; 577 } 578 579 return false; 580 } 581 mike 1.13 582 void XmlParser::_getAttributeNameAndEqual(char*& p) 583 {
584 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
585 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 586 (*p == '_')))
587 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
588 kumpf 1.24 p++;
589 mike 1.13
590 david 1.22 while ((p) && 591 (((p >= 'A') && (p <= 'Z')) \|\| 592 ((p >= 'a') && (*p <= 'z')) \|\|
593 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
594 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
595 mike 1.13 p++; 596 597 char* term = p; 598 599 _skipWhitespace(p); 600 601 if (p != '=') 602 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); 603 604 p++; 605 606 _skipWhitespace(p); 607 608 term = '\0'; 609 } 610 611 void XmlParser::_getAttributeValue(char& p) 612 { 613 // ATTN-B: handle values contained in semiquotes: 614 615 if (p != '"' && p != '\'') 616 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 617 618 char startChar = p++; 619 620 while (p && p != startChar) 621 p++; 622 623 if (p != startChar) 624 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 625 626 p++ = '\0'; 627 } 628 629 void XmlParser::_getComment(char& p) 630 { 631 // Now p points to first non-whitespace character beyond "<--" sequence: 632 633 for (; p; p++) 634 { 635 if (p[0] == '-' && p[1] == '-') 636 { 637 mike 1.13 if (p[2] != '>') 638 { 639 throw XmlException( 640 XmlException::MINUS_MINUS_IN_COMMENT, _line); 641 } 642 643 // Find end of comment (excluding whitespace): 644 645 p = '\0'; 646 p += 3; 647 return; 648 } 649 } 650 651 // If it got this far, then the comment is unterminated: 652 653 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 654 } 655 656 void XmlParser::_getCData(char& p) 657 { 658 mike 1.13 // At this point p points one past "<![CDATA[" sequence: 659 660 for (; p; p++) 661 { 662 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 663 { 664 p = '\0'; 665 p += 3; 666 return; 667 } 668 else if (p == '\n') 669 _line++; 670 } 671 672 // If it got this far, then the comment is unterminated: 673 674 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 675 } 676 677 void XmlParser::_getDocType(char& p) 678 { 679 mike 1.13 // Just ignore the DOCTYPE command for now: 680 681 for (; p && p != '>'; p++) 682 { 683 if (p == '\n') 684 _line++; 685 } 686 687 if (p != '>') 688 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line); 689 690 p++; 691 } 692 693 void XmlParser::_getContent(char& p) 694 { 695 while (p && p != '<') 696 { 697 if (p == '\n') 698 _line++; 699 700 mike 1.13 p++; 701 } 702 } 703 704 void XmlParser::_substituteReferences(char* text) 705 { 706 Uint32 rem = strlen(text); 707 708 for (char* p = text; p; p++, rem--) 709 { 710 if (p == '&') 711 {
712 kumpf 1.18 // Process character or entity reference
713 mike 1.13
714 kumpf 1.18 Uint16 referenceChar = 0; 715 Uint32 referenceLength = 0; 716 XmlException::Code code = XmlException::MALFORMED_REFERENCE; 717 718 if ((p+1) == '#') 719 { 720 // Found a character (numeric) reference 721 // Determine whether it is decimal or hex 722 if ((p+2) == 'x') 723 { 724 // Decode a hexadecimal character reference 725 char* q = p+3; 726 727 // At most four digits are allowed, plus trailing ';' 728 Uint32 numDigits; 729 for (numDigits = 0; numDigits < 5; numDigits++, q++) 730 { 731 if (isdigit(q)) 732 { 733 referenceChar = (referenceChar << 4); 734 referenceChar += (q - '0'); 735 kumpf 1.18 } 736 else if ((q >= 'A') && (q <= 'F')) 737 { 738 referenceChar = (referenceChar << 4); 739 referenceChar += (q - 'A' + 10); 740 } 741 else if ((q >= 'a') && (q <= 'f')) 742 { 743 referenceChar = (referenceChar << 4); 744 referenceChar += (q - 'a' + 10); 745 } 746 else if (q == ';') 747 { 748 break; 749 } 750 else 751 { 752 throw XmlException(code, _line); 753 } 754 } 755 756 kumpf 1.18 // Hex number must be 1 - 4 digits 757 if ((numDigits == 0) \|\| (numDigits > 4)) 758 { 759 throw XmlException(code, _line); 760 } 761 762 // ATTN: Currently do not support 16-bit characters 763 if (referenceChar > 0xff) 764 { 765 // ATTN: Is there a good way to say "unsupported"? 766 throw XmlException(code, _line); 767 } 768 769 referenceLength = numDigits + 4; 770 } 771 else 772 { 773 // Decode a decimal character reference 774 Uint32 newChar = 0; 775 char q = p+2; 776 777 kumpf 1.18 // At most five digits are allowed, plus trailing ';' 778 Uint32 numDigits; 779 for (numDigits = 0; numDigits < 6; numDigits++, q++) 780 { 781 if (isdigit(q)) 782 { 783 newChar = (newChar 10); 784 newChar += (q - '0'); 785 } 786 else if (q == ';') 787 { 788 break; 789 } 790 else 791 { 792 throw XmlException(code, _line); 793 } 794 } 795 796 // Decimal number must be 1 - 5 digits and fit in 16 bits 797 if ((numDigits == 0) \|\| (numDigits > 5) \|\| 798 kumpf 1.18 (newChar > 0xffff)) 799 { 800 throw XmlException(code, _line); 801 } 802 803 // ATTN: Currently do not support 16-bit characters 804 if (newChar > 0xff) 805 { 806 // ATTN: Is there a good way to say "unsupported"? 807 throw XmlException(code, _line); 808 } 809 810 referenceChar = Uint16(newChar); 811 referenceLength = numDigits + 3; 812 } 813 } 814 else 815 { 816 // Check for entity reference 817 // ATTN: Inefficient if many entity references are supported 818 Uint32 i; 819 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++) 820 { 821 Uint32 length = _references[i].length; 822 const char* match = _references[i].match; 823 824 if (strncmp(p, _references[i].match, length) == 0) 825 { 826 referenceChar = _references[i].replacement; 827 referenceLength = length; 828 break; 829 } 830 } 831 832 if (i == _REFERENCES_SIZE) 833 { 834 // Didn't recognize the entity reference 835 // ATTN: Is there a good way to say "unsupported"? 836 throw XmlException(code, _line); 837 } 838 } 839 840 kumpf 1.18 // Replace the reference with the correct character 841 p = (char)referenceChar; 842 char q = p + referenceLength; 843 rem = rem - referenceLength + 1; 844 memmove(p + 1, q, rem);
845 mike 1.13 } 846 } 847 } 848 849 static const char _EMPTY_STRING[] = ""; 850 851 void XmlParser::_getElement(char& p, XmlEntry& entry) 852 { 853 entry.attributeCount = 0; 854 855 //-------------------------------------------------------------------------- 856 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 857 //-------------------------------------------------------------------------- 858 859 if (p == '?') 860 { 861 entry.type = XmlEntry::XML_DECLARATION; 862 entry.text = ++p; 863 864 Boolean openCloseElement = false; 865 866 mike 1.13 if (_getElementName(p)) 867 return; 868 } 869 else if (p == '!') 870 { 871 p++; 872 873 // Expect a comment or CDATA: 874 875 if (p[0] == '-' && p[1] == '-') 876 { 877 p += 2; 878 entry.type = XmlEntry::COMMENT; 879 entry.text = p; 880 _getComment(p); 881 return; 882 } 883 else if (memcmp(p, "[CDATA[", 7) == 0) 884 { 885 p += 7; 886 entry.type = XmlEntry::CDATA; 887 mike 1.13 entry.text = p; 888 _getCData(p); 889 return; 890 } 891 else if (memcmp(p, "DOCTYPE", 7) == 0) 892 { 893 entry.type = XmlEntry::DOCTYPE; 894 entry.text = _EMPTY_STRING; 895 _getDocType(p); 896 return; 897 } 898 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line)); 899 } 900 else if (p == '/') 901 { 902 entry.type = XmlEntry::END_TAG; 903 entry.text = ++p; 904 905 if (!_getElementName(p)) 906 throw(XmlException(XmlException::BAD_END_TAG, _line)); 907 908 mike 1.13 return; 909 }
910 david 1.25 else if ((((p >= 'A') && (p <= 'Z')) \|\|
911 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 912 (*p == '_')))
913 mike 1.13 { 914 entry.type = XmlEntry::START_TAG; 915 entry.text = p; 916 917 Boolean openCloseElement = false; 918 919 if (_getOpenElementName(p, openCloseElement)) 920 { 921 if (openCloseElement) 922 entry.type = XmlEntry::EMPTY_TAG; 923 return; 924 } 925 } 926 else 927 throw XmlException(XmlException::BAD_START_TAG, _line); 928 929 //-------------------------------------------------------------------------- 930 // Grab all the attributes: 931 //-------------------------------------------------------------------------- 932 933 for (;;) 934 mike 1.13 { 935 if (entry.type == XmlEntry::XML_DECLARATION) 936 { 937 if (p[0] == '?' && p[1] == '>') 938 { 939 p += 2; 940 return; 941 } 942 } 943 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 944 { 945 entry.type = XmlEntry::EMPTY_TAG; 946 p += 2; 947 return; 948 } 949 else if (p == '>') 950 { 951 p++; 952 return; 953 } 954 955 mike 1.13 XmlAttribute attr; 956 attr.name = p; 957 _getAttributeNameAndEqual(p); 958 959 if (p != '"' && *p != '\'') 960 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 961 962 attr.value = p + 1; 963 _getAttributeValue(p); 964 965 if (entry.type == XmlEntry::XML_DECLARATION) 966 { 967 // The next thing must a space or a "?>": 968
969 chuck 1.26 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
970 mike 1.13 { 971 throw XmlException( 972 XmlException::BAD_ATTRIBUTE_VALUE, _line); 973 } 974 }
975 chuck 1.26 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| _isspace(p)))
976 mike 1.13 { 977 // The next thing must be a space or a '>': 978 979 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 980 } 981 982 _skipWhitespace(p); 983 984 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) 985 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); 986 987 _substituteReferences((char)attr.value); 988 entry.attributes[entry.attributeCount++] = attr; 989 } 990 } 991 992 static const char _typeStrings[] = 993 { 994 "XML_DECLARATION", 995 "START_TAG", 996 "EMPTY_TAG", 997 mike 1.13 "END_TAG", 998 "COMMENT", 999 "CDATA", 1000 "DOCTYPE", 1001 "CONTENT" 1002 }; 1003 1004 void XmlEntry::print() const 1005 { 1006 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 1007 1008 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 1009 1010 if (needQuotes) 1011 PEGASUS_STD(cout) << "\""; 1012 1013 _printValue(text); 1014 1015 if (needQuotes) 1016 PEGASUS_STD(cout) << "\""; 1017 1018 mike 1.13 PEGASUS_STD(cout) << '\n'; 1019 1020 for (Uint32 i = 0; i < attributeCount; i++) 1021 { 1022 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 1023 _printValue(attributes[i].value); 1024 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl); 1025 } 1026 } 1027 1028 const XmlAttribute* XmlEntry::findAttribute( 1029 const char* name) const 1030 { 1031 for (Uint32 i = 0; i < attributeCount; i++) 1032 { 1033 if (strcmp(attributes[i].name, name) == 0) 1034 return &attributes[i]; 1035 } 1036 1037 return 0; 1038 } 1039 mike 1.13 1040 // Find first non-whitespace character (set first) and last non-whitespace 1041 // character (set last one past this). For example, consider this string: 1042 // 1043 // " 87 " 1044 // 1045 // The first pointer would point to '8' and the last pointer woudl point one 1046 // beyond '7'. 1047 1048 static void _findEnds( 1049 const char* str, 1050 const char& first, 1051 const char& last) 1052 { 1053 first = str; 1054
1055 chuck 1.26 while (_isspace(*first))
1056 mike 1.13 first++; 1057 1058 if (!*first) 1059 { 1060 last = first; 1061 return; 1062 } 1063 1064 last = first + strlen(first); 1065
1066 chuck 1.26 while (last != first && _isspace(last[-1]))
1067 mike 1.13 last--; 1068 } 1069 1070 Boolean XmlEntry::getAttributeValue( 1071 const char* name, 1072 Uint32& value) const 1073 { 1074 const XmlAttribute* attr = findAttribute(name); 1075 1076 if (!attr) 1077 return false; 1078 1079 const char* first; 1080 const char* last; 1081 _findEnds(attr->value, first, last); 1082 1083 char* end = 0; 1084 long tmp = strtol(first, &end, 10); 1085 1086 if (!end \|\| end != last) 1087 return false; 1088 mike 1.13 1089 value = Uint32(tmp); 1090 return true; 1091 } 1092 1093 Boolean XmlEntry::getAttributeValue( 1094 const char* name, 1095 Real32& value) const 1096 { 1097 const XmlAttribute* attr = findAttribute(name); 1098 1099 if (!attr) 1100 return false; 1101 1102 const char* first; 1103 const char* last; 1104 _findEnds(attr->value, first, last); 1105 1106 char* end = 0; 1107 double tmp = strtod(first, &end); 1108 1109 mike 1.13 if (!end \|\| end != last) 1110 return false; 1111 1112 value = Uint32(tmp); 1113 return true; 1114 } 1115 1116 Boolean XmlEntry::getAttributeValue( 1117 const char* name, 1118 const char& value) const 1119 { 1120 const XmlAttribute attr = findAttribute(name); 1121 1122 if (!attr) 1123 return false; 1124 1125 value = attr->value; 1126 return true; 1127 } 1128 1129 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1130 mike 1.13 { 1131 const char* tmp; 1132 1133 if (!getAttributeValue(name, tmp)) 1134 return false; 1135
1136 chuck 1.28 value = String(tmp);
1137 mike 1.13 return true; 1138 } 1139 1140 void XmlAppendCString(Array<Sint8>& out, const char* str) 1141 { 1142 out.append(str, strlen(str)); 1143 } 1144 1145 PEGASUS_NAMESPACE_END

No CVS admin address has been configured