pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.29 //%2004////////////////////////////////////////////////////////////////////////
2 mike 1.13 //
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 mike 1.13 // 10 // Permission is hereby granted, free of charge, to any person obtaining a copy
11 kumpf 1.16 // of this software and associated documentation files (the "Software"), to 12 // deal in the Software without restriction, including without limitation the 13 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is 15 // furnished to do so, subject to the following conditions: 16 //
17 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
18 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 19 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
20 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 21 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 // 26 //============================================================================== 27 // 28 // Author: Mike Brasher (mbrasher@bmc.com) 29 // 30 // Modified By: 31 // 32 //%///////////////////////////////////////////////////////////////////////////// 33 34 //////////////////////////////////////////////////////////////////////////////// 35 // 36 // XmlParser 37 // 38 // This file contains a simple non-validating XML parser. Here are 39 // serveral rules for well-formed XML: 40 // 41 // 1. Documents must begin with an XML declaration: 42 // 43 // <?xml version="1.0" standalone="yes"?> 44 mike 1.13 // 45 // 2. Comments have the form: 46 // 47 // <!-- blah blah blah --> 48 // 49 // 3. The following entity references are supported: 50 // 51 // &amp - ampersand 52 // &lt - less-than 53 // &gt - greater-than 54 // &quot - full quote 55 // &apos - apostrophe 56 //
57 kumpf 1.18 // as well as character (numeric) references: 58 59 // 1 - decimal reference for character '1' 60 // 1 - hexadecimal reference for character '1' 61 //
62 mike 1.13 // 4. Element names and attribute names take the following form: 63 // 64 // [A-Za-z_][A-Za-z_0-9-.:] 65 // 66 // 5. Arbitrary data (CDATA) can be enclosed like this: 67 // 68 // <![CDATA[ 69 // ... 70 // ]]> 71 // 72 // 6. Element names and attributes names are case-sensitive. 73 // 74 // 7. XmlAttribute values must be delimited by full or half quotes. 75 // XmlAttribute values must be delimited. 76 // 77 // 8. <!DOCTYPE...> 78 // 79 // TODO: 80 //
81 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
82 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing 83 // rules rather than references to files). 84 // 85 // Remove newlines from string literals: 86 // 87 // Example: <xyz x="hello 88 // world"> 89 // 90 //////////////////////////////////////////////////////////////////////////////// 91
92 sage 1.14 #include <Pegasus/Common/Config.h>
93 mike 1.13 #include <cctype> 94 #include <cstdio> 95 #include <cstdlib> 96 #include <cstring> 97 #include "XmlParser.h" 98 #include "Logger.h"
99 chuck 1.19 #include "ExceptionRep.h"
100 mike 1.13 101 PEGASUS_NAMESPACE_BEGIN 102 103 #define PEGASUS_ARRAY_T XmlEntry 104 # include "ArrayImpl.h" 105 #undef PEGASUS_ARRAY_T 106 107 108 //////////////////////////////////////////////////////////////////////////////// 109 // 110 // Static helper functions 111 // 112 //////////////////////////////////////////////////////////////////////////////// 113 114 static void _printValue(const char* p) 115 { 116 for (; p; p++) 117 { 118 if (p == '\n') 119 PEGASUS_STD(cout) << "\\n"; 120 else if (p == '\r') 121 mike 1.13 PEGASUS_STD(cout) << "\\r"; 122 else if (p == '\t') 123 PEGASUS_STD(cout) << "\\t"; 124 else 125 PEGASUS_STD(cout) << p; 126 } 127 } 128 129 struct EntityReference 130 { 131 const char match; 132 Uint32 length; 133 char replacement; 134 }; 135
136 kumpf 1.18 // ATTN: Add support for more entity references
137 mike 1.13 static EntityReference _references[] = 138 { 139 { "&", 5, '&' }, 140 { "<", 4, '<' }, 141 { ">", 4, '>' }, 142 { """, 6, '"' }, 143 { "'", 6, '\'' } 144 }; 145
146 chuck 1.26 147 // Implements a check for a whitespace character, without calling 148 // isspace( ). The isspace( ) function is locale-sensitive, 149 // and incorrectly flags some chars above 0x7f as whitespace. This 150 // causes the XmlParser to incorrectly parse UTF-8 data. 151 // 152 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) 153 // defines white space as: 154 // S ::= (#x20 \| #x9 \| #xD \| #xA)+ 155 static int _isspace(char c) 156 { 157 if (c == ' ' \|\| c == '\r' \|\| c == '\t' \|\| c == '\n') 158 return 1; 159 return 0; 160 } 161 162
163 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); 164 165 // Remove all redundant spaces from the given string: 166 167 static void _normalize(char* text) 168 { 169 Uint32 length = strlen(text); 170 char* p = text; 171 char* end = p + length; 172 173 // Remove leading spaces: 174
175 chuck 1.26 while (_isspace(*p)) 176 p++;
177 mike 1.13 178 if (p != text) 179 memmove(text, p, end - p + 1); 180 181 p = text; 182 183 // Look for sequences of more than one space and remove all but one. 184 185 for (;;) 186 { 187 // Advance to the next space: 188
189 chuck 1.26 while (p && !_isspace(p))
190 mike 1.13 p++; 191 192 if (!p) 193 break; 194 195 // Advance to the next non-space: 196 197 char q = p++; 198
199 chuck 1.26 while (_isspace(*p))
200 mike 1.13 p++; 201 202 // Discard trailing spaces (if we are at the end): 203 204 if (!p) 205 { 206 q = '\0'; 207 break; 208 } 209 210 // Remove the redundant spaces: 211 212 Uint32 n = p - q; 213 214 if (n > 1) 215 { 216 q++ = ' '; 217 memmove(q, p, end - p + 1); 218 p = q; 219 } 220 } 221 mike 1.13 } 222 223 //////////////////////////////////////////////////////////////////////////////// 224 // 225 // XmlException 226 // 227 //////////////////////////////////////////////////////////////////////////////// 228 229 static const char _xmlMessages[] = 230 { 231 "Bad opening element", 232 "Bad closing element", 233 "Bad attribute name", 234 "Exepected equal sign", 235 "Bad attribute value", 236 "A \"--\" sequence found within comment", 237 "Unterminated comment", 238 "Unterminated CDATA block", 239 "Unterminated DOCTYPE", 240 "Too many attributes: parser only handles 10", 241 "Malformed reference", 242 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence", 243 "Closing element does not match opening element", 244 "One or more tags are still open", 245 "More than one root element was encountered", 246 "Validation error", 247 "Semantic error" 248 }; 249
250 chuck 1.19 static const char* _xmlKeys[] = 251 {
252 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
253 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 254 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 255 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 256 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 257 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 258 "Common.XmlParser.UNTERMINATED_COMMENT", 259 "Common.XmlParser.UNTERMINATED_CDATA", 260 "Common.XmlParser.UNTERMINATED_DOCTYPE", 261 "Common.XmlParser.TOO_MANY_ATTRIBUTES", 262 "Common.XmlParser.MALFORMED_REFERENCE", 263 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 264 "Common.XmlParser.START_END_MISMATCH", 265 "Common.XmlParser.UNCLOSED_TAGS", 266 "Common.XmlParser.MULTIPLE_ROOTS", 267 "Common.XmlParser.VALIDATION_ERROR", 268 "Common.XmlParser.SEMANTIC_ERROR" 269 }; 270
271 chuck 1.23 // l10n replace _formMessage (comment out the old one)
272 chuck 1.19 /*
273 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message) 274 { 275 String result = _xmlMessages[Uint32(code) - 1]; 276 277 char buffer[32]; 278 sprintf(buffer, "%d", line); 279 result.append(": on line "); 280 result.append(buffer); 281 282 if (message.size()) 283 { 284 result.append(": "); 285 result.append(message); 286 } 287 288 return result; 289 }
290 chuck 1.19 */ 291 292 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message) 293 { 294 String dftMsg = _xmlMessages[Uint32(code) - 1]; 295 String key = _xmlKeys[Uint32(code) - 1]; 296 String msg = message; 297 298 dftMsg.append(": on line $0"); 299 if (message.size()) 300 {
301 humberto 1.20 msg = ": " + msg;
302 chuck 1.19 dftMsg.append("$1"); 303 } 304 305 return MessageLoaderParms(key, dftMsg, line ,msg); 306 } 307 308 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 309 { 310 String dftMsg = _xmlMessages[Uint32(code) - 1]; 311 String key = _xmlKeys[Uint32(code) - 1]; 312 313 dftMsg.append(": on line $0"); 314 315 return MessageLoaderParms(key, dftMsg, line); 316 } 317
318 mike 1.13 319 XmlException::XmlException( 320 XmlException::Code code, 321 Uint32 lineNumber, 322 const String& message) 323 : Exception(_formMessage(code, lineNumber, message)) 324 { 325 326 } 327
328 chuck 1.19 329 XmlException::XmlException( 330 XmlException::Code code, 331 Uint32 lineNumber, 332 MessageLoaderParms& msgParms) 333 : Exception(_formPartialMessage(code, lineNumber)) 334 {
335 humberto 1.21 if (msgParms.default_msg.size()) 336 { 337 msgParms.default_msg = ": " + msgParms.default_msg; 338 }
339 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms)); 340 } 341 342
343 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 344 // 345 // XmlValidationError 346 // 347 //////////////////////////////////////////////////////////////////////////////// 348 349 XmlValidationError::XmlValidationError( 350 Uint32 lineNumber, 351 const String& message) 352 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 353 { 354 355 } 356
357 chuck 1.19 358 XmlValidationError::XmlValidationError( 359 Uint32 lineNumber, 360 MessageLoaderParms& msgParms) 361 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 362 { 363 364 } 365 366
367 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 368 // 369 // XmlSemanticError 370 // 371 //////////////////////////////////////////////////////////////////////////////// 372 373 XmlSemanticError::XmlSemanticError( 374 Uint32 lineNumber, 375 const String& message) 376 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 377 { 378 379 }
380 chuck 1.19 381 382 XmlSemanticError::XmlSemanticError( 383 Uint32 lineNumber, 384 MessageLoaderParms& msgParms) 385 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 386 { 387 388 } 389
390 mike 1.13 391 //////////////////////////////////////////////////////////////////////////////// 392 // 393 // XmlParser 394 // 395 //////////////////////////////////////////////////////////////////////////////// 396 397 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 398 _restoreChar('\0'), _foundRoot(false) 399 { 400 401 } 402 403 Boolean XmlParser::next(XmlEntry& entry) 404 { 405 if (!_putBackStack.isEmpty()) 406 { 407 entry = _putBackStack.top(); 408 _putBackStack.pop(); 409 return true; 410 } 411 mike 1.13 412 // If a character was overwritten with a null-terminator the last 413 // time this routine was called, then put back that character. Before 414 // exiting of course, restore the null-terminator. 415 416 char* nullTerminator = 0; 417 418 if (_restoreChar && !_current) 419 { 420 nullTerminator = _current; 421 _current = _restoreChar; 422 _restoreChar = '\0'; 423 } 424 425 // Skip over any whitespace: 426 427 _skipWhitespace(_current); 428 429 if (!_current) 430 { 431 if (nullTerminator) 432 mike 1.13 nullTerminator = '\0'; 433 434 if (!_stack.isEmpty()) 435 throw XmlException(XmlException::UNCLOSED_TAGS, _line); 436 437 return false; 438 } 439 440 // Either a "<...>" or content begins next: 441 442 if (_current == '<') 443 { 444 _current++; 445 _getElement(_current, entry); 446 447 if (nullTerminator) 448 nullTerminator = '\0'; 449 450 if (entry.type == XmlEntry::START_TAG) 451 { 452 if (_stack.isEmpty() && _foundRoot) 453 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line); 454 455 _foundRoot = true; 456 _stack.push((char)entry.text); 457 } 458 else if (entry.type == XmlEntry::END_TAG) 459 { 460 if (_stack.isEmpty()) 461 throw XmlException(XmlException::START_END_MISMATCH, _line); 462 463 if (strcmp(_stack.top(), entry.text) != 0) 464 throw XmlException(XmlException::START_END_MISMATCH, _line); 465 466 _stack.pop(); 467 } 468 469 return true; 470 } 471 else 472 { 473 entry.type = XmlEntry::CONTENT; 474 mike 1.13 entry.text = _current; 475 _getContent(_current); 476 _restoreChar = _current; 477 _current = '\0'; 478 479 if (nullTerminator) 480 nullTerminator = '\0'; 481 482 _substituteReferences((char)entry.text); 483 _normalize((char)entry.text); 484 485 return true; 486 } 487 } 488 489 void XmlParser::putBack(XmlEntry& entry) 490 { 491 _putBackStack.push(entry); 492 } 493 494 XmlParser::~XmlParser() 495 mike 1.13 { 496 // Nothing to do! 497 } 498 499 void XmlParser::_skipWhitespace(char*& p) 500 {
501 chuck 1.26 while (p && _isspace(p))
502 mike 1.13 { 503 if (p == '\n') 504 _line++; 505 506 p++; 507 } 508 } 509 510 Boolean XmlParser::_getElementName(char& p) 511 {
512 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
513 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 514 (*p == '_')))
515 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
516 kumpf 1.24 p++;
517 mike 1.13
518 david 1.22 while ((p) && 519 (((p >= 'A') && (p <= 'Z')) \|\| 520 ((p >= 'a') && (*p <= 'z')) \|\|
521 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
522 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
523 mike 1.13 p++; 524 525 // The next character must be a space: 526
527 chuck 1.26 if (_isspace(*p))
528 mike 1.13 { 529 p++ = '\0'; 530 _skipWhitespace(p); 531 } 532 533 if (p == '>') 534 { 535 p++ = '\0'; 536 return true; 537 } 538 539 return false; 540 } 541 542 Boolean XmlParser::_getOpenElementName(char& p, Boolean& openCloseElement) 543 { 544 openCloseElement = false; 545
546 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
547 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 548 (*p == '_')))
549 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
550 kumpf 1.24 p++;
551 mike 1.13
552 david 1.22 while ((p) && 553 (((p >= 'A') && (p <= 'Z')) \|\| 554 ((p >= 'a') && (*p <= 'z')) \|\|
555 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
556 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
557 mike 1.13 p++; 558 559 // The next character must be a space: 560
561 chuck 1.26 if (_isspace(*p))
562 mike 1.13 { 563 p++ = '\0'; 564 _skipWhitespace(p); 565 } 566 567 if (p == '>') 568 { 569 p++ = '\0'; 570 return true; 571 } 572 573 if (p[0] == '/' && p[1] == '>') 574 { 575 openCloseElement = true; 576 p = '\0'; 577 p += 2; 578 return true; 579 } 580 581 return false; 582 } 583 mike 1.13 584 void XmlParser::_getAttributeNameAndEqual(char*& p) 585 {
586 david 1.25 if (!(((p >= 'A') && (p <= 'Z')) \|\|
587 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 588 (*p == '_')))
589 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
590 kumpf 1.24 p++;
591 mike 1.13
592 david 1.22 while ((p) && 593 (((p >= 'A') && (p <= 'Z')) \|\| 594 ((p >= 'a') && (*p <= 'z')) \|\|
595 kumpf 1.24 ((p >= '0') && (p <= '9')) \|\|
596 david 1.22 p == '_' \|\| p == '-' \|\| p == ':' \|\| p == '.'))
597 mike 1.13 p++; 598 599 char* term = p; 600 601 _skipWhitespace(p); 602 603 if (p != '=') 604 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); 605 606 p++; 607 608 _skipWhitespace(p); 609 610 term = '\0'; 611 } 612 613 void XmlParser::_getAttributeValue(char& p) 614 { 615 // ATTN-B: handle values contained in semiquotes: 616 617 if (p != '"' && p != '\'') 618 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 619 620 char startChar = p++; 621 622 while (p && p != startChar) 623 p++; 624 625 if (p != startChar) 626 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 627 628 p++ = '\0'; 629 } 630 631 void XmlParser::_getComment(char& p) 632 { 633 // Now p points to first non-whitespace character beyond "<--" sequence: 634 635 for (; p; p++) 636 { 637 if (p[0] == '-' && p[1] == '-') 638 { 639 mike 1.13 if (p[2] != '>') 640 { 641 throw XmlException( 642 XmlException::MINUS_MINUS_IN_COMMENT, _line); 643 } 644 645 // Find end of comment (excluding whitespace): 646 647 p = '\0'; 648 p += 3; 649 return; 650 } 651 } 652 653 // If it got this far, then the comment is unterminated: 654 655 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 656 } 657 658 void XmlParser::_getCData(char& p) 659 { 660 mike 1.13 // At this point p points one past "<![CDATA[" sequence: 661 662 for (; p; p++) 663 { 664 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 665 { 666 p = '\0'; 667 p += 3; 668 return; 669 } 670 else if (p == '\n') 671 _line++; 672 } 673 674 // If it got this far, then the comment is unterminated: 675 676 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 677 } 678 679 void XmlParser::_getDocType(char& p) 680 { 681 mike 1.13 // Just ignore the DOCTYPE command for now: 682 683 for (; p && p != '>'; p++) 684 { 685 if (p == '\n') 686 _line++; 687 } 688 689 if (p != '>') 690 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line); 691 692 p++; 693 } 694 695 void XmlParser::_getContent(char& p) 696 { 697 while (p && p != '<') 698 { 699 if (p == '\n') 700 _line++; 701 702 mike 1.13 p++; 703 } 704 } 705 706 void XmlParser::_substituteReferences(char* text) 707 { 708 Uint32 rem = strlen(text); 709 710 for (char* p = text; p; p++, rem--) 711 { 712 if (p == '&') 713 {
714 kumpf 1.18 // Process character or entity reference
715 mike 1.13
716 kumpf 1.18 Uint16 referenceChar = 0; 717 Uint32 referenceLength = 0; 718 XmlException::Code code = XmlException::MALFORMED_REFERENCE; 719 720 if ((p+1) == '#') 721 { 722 // Found a character (numeric) reference 723 // Determine whether it is decimal or hex 724 if ((p+2) == 'x') 725 { 726 // Decode a hexadecimal character reference 727 char* q = p+3; 728 729 // At most four digits are allowed, plus trailing ';' 730 Uint32 numDigits; 731 for (numDigits = 0; numDigits < 5; numDigits++, q++) 732 { 733 if (isdigit(q)) 734 { 735 referenceChar = (referenceChar << 4); 736 referenceChar += (q - '0'); 737 kumpf 1.18 } 738 else if ((q >= 'A') && (q <= 'F')) 739 { 740 referenceChar = (referenceChar << 4); 741 referenceChar += (q - 'A' + 10); 742 } 743 else if ((q >= 'a') && (q <= 'f')) 744 { 745 referenceChar = (referenceChar << 4); 746 referenceChar += (q - 'a' + 10); 747 } 748 else if (q == ';') 749 { 750 break; 751 } 752 else 753 { 754 throw XmlException(code, _line); 755 } 756 } 757 758 kumpf 1.18 // Hex number must be 1 - 4 digits 759 if ((numDigits == 0) \|\| (numDigits > 4)) 760 { 761 throw XmlException(code, _line); 762 } 763 764 // ATTN: Currently do not support 16-bit characters 765 if (referenceChar > 0xff) 766 { 767 // ATTN: Is there a good way to say "unsupported"? 768 throw XmlException(code, _line); 769 } 770 771 referenceLength = numDigits + 4; 772 } 773 else 774 { 775 // Decode a decimal character reference 776 Uint32 newChar = 0; 777 char q = p+2; 778 779 kumpf 1.18 // At most five digits are allowed, plus trailing ';' 780 Uint32 numDigits; 781 for (numDigits = 0; numDigits < 6; numDigits++, q++) 782 { 783 if (isdigit(q)) 784 { 785 newChar = (newChar 10); 786 newChar += (q - '0'); 787 } 788 else if (q == ';') 789 { 790 break; 791 } 792 else 793 { 794 throw XmlException(code, _line); 795 } 796 } 797 798 // Decimal number must be 1 - 5 digits and fit in 16 bits 799 if ((numDigits == 0) \|\| (numDigits > 5) \|\| 800 kumpf 1.18 (newChar > 0xffff)) 801 { 802 throw XmlException(code, _line); 803 } 804 805 // ATTN: Currently do not support 16-bit characters 806 if (newChar > 0xff) 807 { 808 // ATTN: Is there a good way to say "unsupported"? 809 throw XmlException(code, _line); 810 } 811 812 referenceChar = Uint16(newChar); 813 referenceLength = numDigits + 3; 814 } 815 } 816 else 817 { 818 // Check for entity reference 819 // ATTN: Inefficient if many entity references are supported 820 Uint32 i; 821 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++) 822 { 823 Uint32 length = _references[i].length; 824 const char* match = _references[i].match; 825 826 if (strncmp(p, _references[i].match, length) == 0) 827 { 828 referenceChar = _references[i].replacement; 829 referenceLength = length; 830 break; 831 } 832 } 833 834 if (i == _REFERENCES_SIZE) 835 { 836 // Didn't recognize the entity reference 837 // ATTN: Is there a good way to say "unsupported"? 838 throw XmlException(code, _line); 839 } 840 } 841 842 kumpf 1.18 // Replace the reference with the correct character 843 p = (char)referenceChar; 844 char q = p + referenceLength; 845 rem = rem - referenceLength + 1; 846 memmove(p + 1, q, rem);
847 mike 1.13 } 848 } 849 } 850 851 static const char _EMPTY_STRING[] = ""; 852 853 void XmlParser::_getElement(char& p, XmlEntry& entry) 854 { 855 entry.attributeCount = 0; 856 857 //-------------------------------------------------------------------------- 858 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 859 //-------------------------------------------------------------------------- 860 861 if (p == '?') 862 { 863 entry.type = XmlEntry::XML_DECLARATION; 864 entry.text = ++p; 865 866 Boolean openCloseElement = false; 867 868 mike 1.13 if (_getElementName(p)) 869 return; 870 } 871 else if (p == '!') 872 { 873 p++; 874 875 // Expect a comment or CDATA: 876 877 if (p[0] == '-' && p[1] == '-') 878 { 879 p += 2; 880 entry.type = XmlEntry::COMMENT; 881 entry.text = p; 882 _getComment(p); 883 return; 884 } 885 else if (memcmp(p, "[CDATA[", 7) == 0) 886 { 887 p += 7; 888 entry.type = XmlEntry::CDATA; 889 mike 1.13 entry.text = p; 890 _getCData(p); 891 return; 892 } 893 else if (memcmp(p, "DOCTYPE", 7) == 0) 894 { 895 entry.type = XmlEntry::DOCTYPE; 896 entry.text = _EMPTY_STRING; 897 _getDocType(p); 898 return; 899 } 900 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line)); 901 } 902 else if (p == '/') 903 { 904 entry.type = XmlEntry::END_TAG; 905 entry.text = ++p; 906 907 if (!_getElementName(p)) 908 throw(XmlException(XmlException::BAD_END_TAG, _line)); 909 910 mike 1.13 return; 911 }
912 david 1.25 else if ((((p >= 'A') && (p <= 'Z')) \|\|
913 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 914 (*p == '_')))
915 mike 1.13 { 916 entry.type = XmlEntry::START_TAG; 917 entry.text = p; 918 919 Boolean openCloseElement = false; 920 921 if (_getOpenElementName(p, openCloseElement)) 922 { 923 if (openCloseElement) 924 entry.type = XmlEntry::EMPTY_TAG; 925 return; 926 } 927 } 928 else 929 throw XmlException(XmlException::BAD_START_TAG, _line); 930 931 //-------------------------------------------------------------------------- 932 // Grab all the attributes: 933 //-------------------------------------------------------------------------- 934 935 for (;;) 936 mike 1.13 { 937 if (entry.type == XmlEntry::XML_DECLARATION) 938 { 939 if (p[0] == '?' && p[1] == '>') 940 { 941 p += 2; 942 return; 943 } 944 } 945 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 946 { 947 entry.type = XmlEntry::EMPTY_TAG; 948 p += 2; 949 return; 950 } 951 else if (p == '>') 952 { 953 p++; 954 return; 955 } 956 957 mike 1.13 XmlAttribute attr; 958 attr.name = p; 959 _getAttributeNameAndEqual(p); 960 961 if (p != '"' && *p != '\'') 962 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 963 964 attr.value = p + 1; 965 _getAttributeValue(p); 966 967 if (entry.type == XmlEntry::XML_DECLARATION) 968 { 969 // The next thing must a space or a "?>": 970
971 chuck 1.26 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
972 mike 1.13 { 973 throw XmlException( 974 XmlException::BAD_ATTRIBUTE_VALUE, _line); 975 } 976 }
977 chuck 1.26 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| _isspace(p)))
978 mike 1.13 { 979 // The next thing must be a space or a '>': 980 981 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 982 } 983 984 _skipWhitespace(p); 985 986 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) 987 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); 988 989 _substituteReferences((char)attr.value); 990 entry.attributes[entry.attributeCount++] = attr; 991 } 992 } 993 994 static const char _typeStrings[] = 995 { 996 "XML_DECLARATION", 997 "START_TAG", 998 "EMPTY_TAG", 999 mike 1.13 "END_TAG", 1000 "COMMENT", 1001 "CDATA", 1002 "DOCTYPE", 1003 "CONTENT" 1004 }; 1005 1006 void XmlEntry::print() const 1007 { 1008 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 1009 1010 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 1011 1012 if (needQuotes) 1013 PEGASUS_STD(cout) << "\""; 1014 1015 _printValue(text); 1016 1017 if (needQuotes) 1018 PEGASUS_STD(cout) << "\""; 1019 1020 mike 1.13 PEGASUS_STD(cout) << '\n'; 1021 1022 for (Uint32 i = 0; i < attributeCount; i++) 1023 { 1024 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 1025 _printValue(attributes[i].value); 1026 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl); 1027 } 1028 } 1029 1030 const XmlAttribute* XmlEntry::findAttribute( 1031 const char* name) const 1032 { 1033 for (Uint32 i = 0; i < attributeCount; i++) 1034 { 1035 if (strcmp(attributes[i].name, name) == 0) 1036 return &attributes[i]; 1037 } 1038 1039 return 0; 1040 } 1041 mike 1.13 1042 // Find first non-whitespace character (set first) and last non-whitespace 1043 // character (set last one past this). For example, consider this string: 1044 // 1045 // " 87 " 1046 // 1047 // The first pointer would point to '8' and the last pointer woudl point one 1048 // beyond '7'. 1049 1050 static void _findEnds( 1051 const char* str, 1052 const char& first, 1053 const char& last) 1054 { 1055 first = str; 1056
1057 chuck 1.26 while (_isspace(*first))
1058 mike 1.13 first++; 1059 1060 if (!*first) 1061 { 1062 last = first; 1063 return; 1064 } 1065 1066 last = first + strlen(first); 1067
1068 chuck 1.26 while (last != first && _isspace(last[-1]))
1069 mike 1.13 last--; 1070 } 1071 1072 Boolean XmlEntry::getAttributeValue( 1073 const char* name, 1074 Uint32& value) const 1075 { 1076 const XmlAttribute* attr = findAttribute(name); 1077 1078 if (!attr) 1079 return false; 1080 1081 const char* first; 1082 const char* last; 1083 _findEnds(attr->value, first, last); 1084 1085 char* end = 0; 1086 long tmp = strtol(first, &end, 10); 1087 1088 if (!end \|\| end != last) 1089 return false; 1090 mike 1.13 1091 value = Uint32(tmp); 1092 return true; 1093 } 1094 1095 Boolean XmlEntry::getAttributeValue( 1096 const char* name, 1097 Real32& value) const 1098 { 1099 const XmlAttribute* attr = findAttribute(name); 1100 1101 if (!attr) 1102 return false; 1103 1104 const char* first; 1105 const char* last; 1106 _findEnds(attr->value, first, last); 1107 1108 char* end = 0; 1109 double tmp = strtod(first, &end); 1110 1111 mike 1.13 if (!end \|\| end != last) 1112 return false; 1113 1114 value = Uint32(tmp); 1115 return true; 1116 } 1117 1118 Boolean XmlEntry::getAttributeValue( 1119 const char* name, 1120 const char& value) const 1121 { 1122 const XmlAttribute attr = findAttribute(name); 1123 1124 if (!attr) 1125 return false; 1126 1127 value = attr->value; 1128 return true; 1129 } 1130 1131 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1132 mike 1.13 { 1133 const char* tmp; 1134 1135 if (!getAttributeValue(name, tmp)) 1136 return false; 1137
1138 chuck 1.28 value = String(tmp);
1139 mike 1.13 return true; 1140 } 1141 1142 void XmlAppendCString(Array<Sint8>& out, const char* str) 1143 { 1144 out.append(str, strlen(str)); 1145 } 1146 1147 PEGASUS_NAMESPACE_END

No CVS admin address has been configured