pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 martin 1.51 //%LICENSE////////////////////////////////////////////////////////////////
2 martin 1.52 //
3 martin 1.51 // Licensed to The Open Group (TOG) under one or more contributor license 4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with 5 // this work for additional information regarding copyright ownership. 6 // Each contributor licenses this file to you under the OpenPegasus Open 7 // Source License; you may not use this file except in compliance with the 8 // License.
9 martin 1.52 //
10 martin 1.51 // Permission is hereby granted, free of charge, to any person obtaining a 11 // copy of this software and associated documentation files (the "Software"), 12 // to deal in the Software without restriction, including without limitation 13 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 // and/or sell copies of the Software, and to permit persons to whom the 15 // Software is furnished to do so, subject to the following conditions:
16 martin 1.52 //
17 martin 1.51 // The above copyright notice and this permission notice shall be included 18 // in all copies or substantial portions of the Software.
19 martin 1.52 //
20 martin 1.51 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 martin 1.52 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 martin 1.51 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 martin 1.52 //
28 martin 1.51 //////////////////////////////////////////////////////////////////////////
29 mike 1.13 // 30 //%///////////////////////////////////////////////////////////////////////////// 31 32 //////////////////////////////////////////////////////////////////////////////// 33 // 34 // XmlParser 35 //
36 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are 37 // serveral rules for well-formed XML:
38 mike 1.13 //
39 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
40 mike 1.13 //
41 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
42 mike 1.13 //
43 david.dillard 1.32 // 2. Comments have the form:
44 mike 1.13 //
45 david.dillard 1.32 // <!-- blah blah blah -->
46 mike 1.13 //
47 david.dillard 1.32 // 3. The following entity references are supported:
48 mike 1.13 //
49 david.dillard 1.32 // &amp - ampersand 50 // &lt - less-than 51 // &gt - greater-than 52 // &quot - full quote 53 // &apos - apostrophe
54 mike 1.13 //
55 kumpf 1.18 // as well as character (numeric) references:
56 mike 1.35 //
57 kumpf 1.18 // 1 - decimal reference for character '1' 58 // 1 - hexadecimal reference for character '1' 59 //
60 david.dillard 1.32 // 4. Element names and attribute names take the following form:
61 mike 1.13 //
62 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
63 mike 1.13 //
64 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
65 mike 1.13 //
66 david.dillard 1.32 // <![CDATA[ 67 // ... 68 // ]]>
69 mike 1.13 //
70 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
71 mike 1.13 //
72 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes. 73 // XmlAttribute values must be delimited.
74 mike 1.13 //
75 david.dillard 1.32 // 8. <!DOCTYPE...>
76 mike 1.13 // 77 // TODO: 78 //
79 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
80 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
81 mike 1.13 // rules rather than references to files). 82 //
83 david.dillard 1.32 // Remove newlines from string literals:
84 mike 1.13 // 85 // Example: <xyz x="hello
86 david.dillard 1.32 // world">
87 mike 1.13 // 88 //////////////////////////////////////////////////////////////////////////////// 89
90 sage 1.14 #include <Pegasus/Common/Config.h>
91 mike 1.13 #include <cctype> 92 #include <cstdio> 93 #include <cstdlib> 94 #include <cstring> 95 #include "XmlParser.h" 96 #include "Logger.h"
97 chuck 1.19 #include "ExceptionRep.h"
98 mike 1.34 #include "CharSet.h"
99 mike 1.13 100 PEGASUS_NAMESPACE_BEGIN 101 102 //////////////////////////////////////////////////////////////////////////////// 103 // 104 // Static helper functions 105 // 106 //////////////////////////////////////////////////////////////////////////////// 107 108 static void _printValue(const char* p) 109 { 110 for (; *p; p++) 111 {
112 david.dillard 1.32 if (p == '\n') 113 PEGASUS_STD(cout) << "\\n"; 114 else if (p == '\r') 115 PEGASUS_STD(cout) << "\\r"; 116 else if (p == '\t') 117 PEGASUS_STD(cout) << "\\t"; 118 else 119 PEGASUS_STD(cout) << p;
120 mike 1.13 } 121 } 122 123 struct EntityReference 124 { 125 const char* match; 126 Uint32 length; 127 char replacement; 128 }; 129
130 kumpf 1.18 // ATTN: Add support for more entity references
131 mike 1.13 static EntityReference _references[] = 132 { 133 { "&", 5, '&' }, 134 { "<", 4, '<' }, 135 { ">", 4, '>' }, 136 { """, 6, '"' }, 137 { "'", 6, '\'' } 138 }; 139
140 chuck 1.26 141 // Implements a check for a whitespace character, without calling 142 // isspace( ). The isspace( ) function is locale-sensitive, 143 // and incorrectly flags some chars above 0x7f as whitespace. This 144 // causes the XmlParser to incorrectly parse UTF-8 data. 145 // 146 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) 147 // defines white space as:
148 david.dillard 1.32 // S ::= (#x20 \| #x9 \| #xD \| #xA)+
149 mike 1.34 static inline int _isspace(char c)
150 chuck 1.26 {
151 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
152 chuck 1.26 } 153
154 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); 155 156 //////////////////////////////////////////////////////////////////////////////// 157 // 158 // XmlException 159 // 160 //////////////////////////////////////////////////////////////////////////////// 161 162 static const char* _xmlMessages[] = 163 { 164 "Bad opening element", 165 "Bad closing element", 166 "Bad attribute name", 167 "Exepected equal sign", 168 "Bad attribute value", 169 "A \"--\" sequence found within comment", 170 "Unterminated comment", 171 "Unterminated CDATA block", 172 "Unterminated DOCTYPE", 173 "Malformed reference", 174 "Expected a comment or CDATA following \"<!\" sequence", 175 mike 1.13 "Closing element does not match opening element", 176 "One or more tags are still open", 177 "More than one root element was encountered", 178 "Validation error",
179 kumpf 1.45 "Semantic error", 180 "Namespace not declared"
181 mike 1.13 }; 182
183 david.dillard 1.32 static const char* _xmlKeys[] =
184 chuck 1.19 {
185 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
186 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 187 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 188 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 189 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 190 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 191 "Common.XmlParser.UNTERMINATED_COMMENT", 192 "Common.XmlParser.UNTERMINATED_CDATA", 193 "Common.XmlParser.UNTERMINATED_DOCTYPE", 194 "Common.XmlParser.MALFORMED_REFERENCE", 195 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 196 "Common.XmlParser.START_END_MISMATCH",
197 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
198 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS", 199 "Common.XmlParser.VALIDATION_ERROR",
200 kumpf 1.45 "Common.XmlParser.SEMANTIC_ERROR", 201 "Common.XmlParser.UNDECLARED_NAMESPACE"
202 chuck 1.19 }; 203 204
205 kumpf 1.40 static MessageLoaderParms _formMessage( 206 Uint32 code, 207 Uint32 line, 208 const String& message)
209 chuck 1.19 { 210 String dftMsg = _xmlMessages[Uint32(code) - 1];
211 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
212 kumpf 1.45 String msg = message;
213 chuck 1.19 214 dftMsg.append(": on line $0"); 215 if (message.size()) 216 {
217 david.dillard 1.32 msg = ": " + msg; 218 dftMsg.append("$1"); 219 }
220 chuck 1.19
221 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line ,msg);
222 chuck 1.19 } 223 224 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 225 { 226 String dftMsg = _xmlMessages[Uint32(code) - 1];
227 kumpf 1.48 const char* key = _xmlKeys[Uint32(code) - 1];
228 chuck 1.19 229 dftMsg.append(": on line $0");
230 david.dillard 1.32
231 kumpf 1.48 return MessageLoaderParms(key, dftMsg.getCString(), line);
232 chuck 1.19 } 233
234 mike 1.13 235 XmlException::XmlException(
236 david.dillard 1.32 XmlException::Code code,
237 mike 1.13 Uint32 lineNumber,
238 david.dillard 1.32 const String& message)
239 mike 1.13 : Exception(_formMessage(code, lineNumber, message)) 240 { 241 242 } 243
244 chuck 1.19 245 XmlException::XmlException(
246 david.dillard 1.32 XmlException::Code code,
247 chuck 1.19 Uint32 lineNumber,
248 david.dillard 1.32 MessageLoaderParms& msgParms)
249 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber)) 250 {
251 david.dillard 1.32 if (msgParms.default_msg.size())
252 humberto 1.21 {
253 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg; 254 } 255 _rep->message.append(MessageLoader::getMessage(msgParms));
256 chuck 1.19 } 257 258
259 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 260 // 261 // XmlValidationError 262 // 263 //////////////////////////////////////////////////////////////////////////////// 264 265 XmlValidationError::XmlValidationError( 266 Uint32 lineNumber, 267 const String& message) 268 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 269 { 270 } 271
272 chuck 1.19 273 XmlValidationError::XmlValidationError( 274 Uint32 lineNumber, 275 MessageLoaderParms& msgParms) 276 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 277 { 278 } 279 280
281 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 282 // 283 // XmlSemanticError 284 // 285 //////////////////////////////////////////////////////////////////////////////// 286 287 XmlSemanticError::XmlSemanticError( 288 Uint32 lineNumber, 289 const String& message) 290 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 291 { 292 }
293 chuck 1.19 294 295 XmlSemanticError::XmlSemanticError( 296 Uint32 lineNumber, 297 MessageLoaderParms& msgParms) 298 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 299 { 300 } 301
302 mike 1.13 303 //////////////////////////////////////////////////////////////////////////////// 304 // 305 // XmlParser 306 // 307 //////////////////////////////////////////////////////////////////////////////// 308
309 mike 1.55 XmlParser::XmlParser(char* text, XmlNamespace* ns, Boolean hideEmptyTags)
310 kumpf 1.40 : _line(1), 311 _current(text), 312 _restoreChar('\0'),
313 kumpf 1.45 _foundRoot(false), 314 _supportedNamespaces(ns), 315 // Start valid indexes with -2. -1 is reserved for not found.
316 mike 1.55 _currentUnsupportedNSType(-2), 317 _hideEmptyTags(hideEmptyTags)
318 mike 1.13 { 319 } 320
321 mike 1.34 inline void _skipWhitespace(Uint32& line, char& p) 322 { 323 while (p && _isspace(p)) 324 { 325 if (p == '\n') 326 line++; 327 328 p++; 329 } 330 } 331
332 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) \|\| \ 333 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) 334 #pragma optimize( "", off ) 335 #endif
336 kumpf 1.37 static int _getEntityRef(char*& p) 337 { 338 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) 339 { 340 p += 3; 341 return '>'; 342 } 343 344 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) 345 { 346 p += 3; 347 return '<'; 348 } 349 350 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && 351 (p[4] == ';')) 352 { 353 p += 5; 354 return '\''; 355 } 356 357 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && 358 (p[4] == ';')) 359 { 360 p += 5; 361 return '"'; 362 } 363 364 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) 365 { 366 p += 4; 367 return '&'; 368 } 369 370 return -1; 371 }
372 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) \|\| \ 373 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
374 kumpf 1.45 #pragma optimize( "", on )
375 s.manicka 1.43 #endif
376 kumpf 1.37
377 kumpf 1.50 static inline int _getCharRef(char*& p)
378 kumpf 1.37 { 379 char* end; 380 unsigned long ch;
381 kumpf 1.50 Boolean hex = false;
382 kumpf 1.37
383 kumpf 1.50 if (*p == 'x')
384 kumpf 1.37 {
385 kumpf 1.50 hex = true; 386 ch = strtoul(++p, &end, 16);
387 kumpf 1.37 } 388 else 389 { 390 ch = strtoul(p, &end, 10); 391 } 392 393 if ((end == p) \|\| (*end != ';') \|\| (ch > 255)) 394 { 395 return -1; 396 } 397 398 if ((hex && (end - p > 4)) \|\| (!hex && (end - p > 5))) 399 { 400 return -1; 401 } 402 403 p = end + 1; 404 405 return ch; 406 } 407
408 kumpf 1.50 // Parse an entity reference or a character reference 409 static inline int _getRef(Uint32 line, char& p) 410 { 411 int ch; 412 413 if (p == '#') 414 { 415 ch = _getCharRef(++p); 416 } 417 else 418 { 419 ch = _getEntityRef(p); 420 } 421 422 if (ch == -1) 423 { 424 throw XmlException(XmlException::MALFORMED_REFERENCE, line); 425 } 426 427 return ch; 428 } 429 kumpf 1.50 430 static inline void _normalizeElementValue( 431 Uint32& line,
432 venkat.puvvada 1.54 char*& p, 433 Uint32 &textLen)
434 kumpf 1.50 { 435 // Process one character at a time: 436 437 char* q = p;
438 venkat.puvvada 1.54 char *start = p;
439 kumpf 1.50 440 while (p && (p != '<')) 441 { 442 if (_isspace(p)) 443 { 444 // Trim whitespace from the end of the value, but do not compress 445 // whitespace within the value. 446 447 const char start = p; 448 449 if (p++ == '\n') 450 { 451 line++; 452 } 453 454 _skipWhitespace(line, p); 455 456 if (p && (p != '<')) 457 { 458 // Transfer internal whitespace to q without compressing it. 459 const char i = start; 460 kumpf 1.50 while (i < p) 461 { 462 q++ = i++; 463 } 464 } 465 else 466 { 467 // Do not transfer trailing whitespace to q. 468 break; 469 } 470 } 471 else if (p == '&') 472 { 473 // Process an entity reference or a character reference. 474 475 q++ = _getRef(line, ++p); 476 } 477 else 478 { 479 q++ = p++; 480 } 481 kumpf 1.50 } 482 483 // If q got behind p, it is safe and necessary to null-terminate q 484 485 if (q != p) 486 { 487 *q = '\0'; 488 }
489 venkat.puvvada 1.54 textLen = (Uint32)(q - start);
490 kumpf 1.50 } 491 492 static inline void _normalizeAttributeValue( 493 Uint32& line, 494 char& p, 495 char end_char, 496 char& start)
497 kumpf 1.37 { 498 // Skip over leading whitespace: 499 500 _skipWhitespace(line, p); 501 start = p; 502 503 // Process one character at a time: 504 505 char* q = p; 506 507 while (p && (p != end_char)) 508 { 509 if (_isspace(p)) 510 { 511 // Compress sequences of whitespace characters to a single space 512 // character. Update line number when newlines encountered. 513 514 if (p++ == '\n') 515 { 516 line++; 517 } 518 kumpf 1.37 519 q++ = ' '; 520 521 _skipWhitespace(line, p); 522 } 523 else if (p == '&') 524 {
525 kumpf 1.50 // Process an entity reference or a character reference.
526 kumpf 1.37
527 kumpf 1.50 *q++ = _getRef(line, ++p);
528 kumpf 1.37 } 529 else 530 { 531 q++ = p++; 532 } 533 } 534 535 // Remove single trailing whitespace (consecutive whitespaces already 536 // compressed above). Since p >= q, we can tell if we need to strip a 537 // trailing space from q by looking at the end of p. We must not look at 538 // the last character of p, though, if p is an empty string.
539 dmitry.mikulin 1.44 Boolean adjust_q = (p != start) && _isspace(p[-1]); 540 541 // We encountered a the end_char or a zero-terminator. 542 543 q = p;
544 kumpf 1.37
545 dmitry.mikulin 1.44 if (adjust_q)
546 kumpf 1.37 { 547 q--; 548 } 549 550 // If q got behind p, it is safe and necessary to null-terminate q 551 552 if (q != p) 553 { 554 *q = '\0'; 555 } 556 } 557
558 mike 1.55 Boolean XmlParser::_next(
559 kumpf 1.45 XmlEntry& entry, 560 Boolean includeComment)
561 mike 1.13 { 562 if (!_putBackStack.isEmpty()) 563 {
564 david.dillard 1.32 entry = _putBackStack.top(); 565 _putBackStack.pop(); 566 return true;
567 mike 1.13 } 568 569 // If a character was overwritten with a null-terminator the last 570 // time this routine was called, then put back that character. Before 571 // exiting of course, restore the null-terminator. 572 573 char* nullTerminator = 0; 574 575 if (_restoreChar && !*_current) 576 {
577 david.dillard 1.32 nullTerminator = _current; 578 *_current = _restoreChar; 579 _restoreChar = '\0';
580 mike 1.13 } 581
582 kumpf 1.45 entry.attributes.clear(); 583 584 if (_supportedNamespaces) 585 { 586 // Remove namespaces of a deeper scope level from the stack. 587 while (!_nameSpaces.isEmpty() && 588 _nameSpaces.top().scopeLevel > _stack.size()) 589 { 590 _nameSpaces.pop(); 591 } 592 } 593
594 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false. 595 do 596 { 597 // Skip over any whitespace: 598 _skipWhitespace(_line, _current); 599 600 if (!_current) 601 { 602 if (nullTerminator) 603 nullTerminator = '\0';
604 mike 1.13
605 venkat.puvvada 1.41 if (!_stack.isEmpty()) 606 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
607 mike 1.13
608 venkat.puvvada 1.41 return false; 609 }
610 mike 1.13
611 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
612 mike 1.13
613 venkat.puvvada 1.41 if (*_current == '<') 614 { 615 _current++; 616 _getElement(_current, entry);
617 mike 1.13
618 venkat.puvvada 1.41 if (nullTerminator) 619 *nullTerminator = '\0';
620 mike 1.13
621 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG) 622 { 623 if (_stack.isEmpty() && _foundRoot) 624 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
625 mike 1.13
626 venkat.puvvada 1.41 _foundRoot = true; 627 _stack.push((char*)entry.text); 628 } 629 else if (entry.type == XmlEntry::END_TAG) 630 { 631 if (_stack.isEmpty()) 632 throw XmlException(XmlException::START_END_MISMATCH, _line);
633 mike 1.13
634 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0) 635 throw XmlException(XmlException::START_END_MISMATCH, _line);
636 david.dillard 1.32
637 venkat.puvvada 1.41 _stack.pop(); 638 }
639 david.dillard 1.32 }
640 venkat.puvvada 1.41 else
641 david.dillard 1.32 {
642 venkat.puvvada 1.41 // Normalize the content:
643 mike 1.13
644 kumpf 1.50 char* start = _current;
645 venkat.puvvada 1.54 Uint32 textLen; 646 _normalizeElementValue(_line, _current, textLen);
647 mike 1.13
648 venkat.puvvada 1.41 // Get the content:
649 mike 1.13
650 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT; 651 entry.text = start;
652 venkat.puvvada 1.54 entry.textLen = textLen;
653 kumpf 1.37
654 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
655 kumpf 1.37
656 venkat.puvvada 1.41 _restoreChar = _current; 657 _current = '\0';
658 kumpf 1.37
659 venkat.puvvada 1.41 if (nullTerminator) 660 *nullTerminator = '\0'; 661 }
662 kumpf 1.45 } while (!includeComment && entry.type == XmlEntry::COMMENT); 663 664 if (_supportedNamespaces && 665 (entry.type == XmlEntry::START_TAG \|\| 666 entry.type == XmlEntry::EMPTY_TAG \|\| 667 entry.type == XmlEntry::END_TAG)) 668 { 669 // Determine the namespace type for this entry 670 671 if (entry.type == XmlEntry::START_TAG \|\| 672 entry.type == XmlEntry::EMPTY_TAG) 673 { 674 // Process namespace declarations and determine the namespace type 675 // for the attributes. 676 677 Uint32 scopeLevel = _stack.size(); 678 if (entry.type == XmlEntry::EMPTY_TAG) 679 { 680 // Empty tags are deeper scope, but not pushed onto the stack 681 scopeLevel++; 682 } 683 kumpf 1.45 684 for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++) 685 { 686 XmlAttribute& attr = entry.attributes[i]; 687 if ((strncmp(attr.name, "xmlns:", 6) == 0) \|\| 688 (strcmp(attr.name, "xmlns") == 0)) 689 { 690 // Process a namespace declaration 691 XmlNamespace ns; 692 if (attr.name[5] == ':') 693 { 694 ns.localName = attr.localName; 695 } 696 else 697 { 698 // Default name space has no local name 699 ns.localName = 0; 700 } 701 ns.extendedName = attr.value; 702 ns.scopeLevel = scopeLevel; 703 ns.type = _getSupportedNamespaceType(ns.extendedName); 704 kumpf 1.45 705 // If the namespace is not supported, assign it a unique 706 // negative identifier. 707 if (ns.type == -1) 708 { 709 ns.type = _currentUnsupportedNSType--; 710 } 711 712 _nameSpaces.push(ns); 713 } 714 else 715 { 716 // Get the namespace type for this attribute. 717 attr.nsType = _getNamespaceType(attr.name); 718 } 719 } 720 } 721 722 entry.nsType = _getNamespaceType(entry.text); 723 } 724 else 725 kumpf 1.45 { 726 entry.nsType = -1; 727 }
728 kumpf 1.37
729 venkat.puvvada 1.41 return true;
730 mike 1.13 } 731
732 mike 1.55 Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment) 733 { 734 if (_hideEmptyTags) 735 { 736 // Get the next tag. 737 738 if (!_next(entry, includeComment)) 739 return false; 740 741 // If an EMPTY_TAG is encountered, then convert it to a START_TAG and 742 // push a matching END_TAG on the put-back stack. This hides every 743 // EMPTY_TAG from the caller. 744 745 if (entry.type == XmlEntry::EMPTY_TAG) 746 { 747 entry.type = XmlEntry::START_TAG; 748 749 XmlEntry tmp; 750 tmp.type = XmlEntry::END_TAG; 751 tmp.text = entry.text; 752 tmp.nsType = entry.nsType; 753 mike 1.55 tmp.localName = entry.localName; 754 755 _putBackStack.push(tmp); 756 } 757 758 return true; 759 } 760 else 761 return _next(entry, includeComment); 762 } 763
764 kumpf 1.45 // Get the namespace type of the given tag 765 int XmlParser::_getNamespaceType(const char* tag) 766 { 767 const char* pos = strchr(tag, ':'); 768 769 // If ':' is not found, the tag is not namespace qualified and we 770 // need to look for the default name space. 771 772 // Search the namespace stack from the top 773 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--) 774 { 775 // If ':' is found, look for the name space with the matching 776 // local name... 777 if ((pos && _nameSpaces[i].localName && 778 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) \|\| 779 // ... otherwise look for the default name space. It's the 780 // one with localName set to NULL 781 (!pos && !_nameSpaces[i].localName)) 782 { 783 return _nameSpaces[i].type; 784 } 785 kumpf 1.45 } 786 787 // If the tag is namespace qualified, but the name space has not been 788 // declared, it's malformed XML and we must throw an exception. 789 // Note: The "xml" namespace is specifically defined by the W3C as a 790 // reserved prefix ("http://www.w3.org/XML/1998/namespace"). 791 if (pos && (strncmp(tag, "xml:", 4) != 0)) 792 { 793 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line); 794 } 795 796 // Otherwise it's OK not to have a name space. 797 return -1; 798 } 799 800 // Given the extended namespace name, find it in the table of supported 801 // namespaces and return its type. 802 int XmlParser::_getSupportedNamespaceType(const char* extendedName) 803 { 804 for (Sint32 i = 0; 805 _supportedNamespaces[i].localName != 0; 806 kumpf 1.45 i++) 807 { 808 PEGASUS_ASSERT(_supportedNamespaces[i].type == i); 809 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName)) 810 { 811 return _supportedNamespaces[i].type; 812 } 813 } 814 return -1; 815 } 816 817 XmlNamespace* XmlParser::getNamespace(int nsType) 818 { 819 for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--) 820 { 821 if (_nameSpaces[i].type == nsType) 822 { 823 return &_nameSpaces[i]; 824 } 825 } 826 return 0; 827 kumpf 1.45 } 828
829 mike 1.13 void XmlParser::putBack(XmlEntry& entry) 830 { 831 _putBackStack.push(entry); 832 } 833 834 XmlParser::~XmlParser() 835 { 836 // Nothing to do! 837 } 838
839 kumpf 1.45 // A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)
840 kumpf 1.40 static unsigned char _isInnerElementChar[] =
841 mike 1.35 { 842 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
843 kumpf 1.45 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
844 mike 1.35 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 845 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 846 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 847 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
848 thilo.boehm 1.49 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
849 mike 1.35 }; 850
851 kumpf 1.45 inline Boolean _getQName(char& p, const char& localName)
852 mike 1.13 {
853 kumpf 1.45 localName = p; 854
855 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
856 kumpf 1.45 return false;
857 mike 1.35
858 kumpf 1.24 p++;
859 mike 1.13
860 kumpf 1.53 // No explicit test for NULL termination is needed. 861 // On position 0 of the array false is returned.
862 thilo.boehm 1.49 while (_isInnerElementChar[Uint8(*p)])
863 david.dillard 1.32 p++;
864 mike 1.13
865 kumpf 1.45 // We've validated the prefix, now validate the local name 866 if (p == ':') 867 { 868 localName = ++p; 869 870 if (!CharSet::isAlNumUnder(Uint8(p))) 871 return false; 872 873 p++;
874 kumpf 1.53 // No explicit test for NULL termination is needed. 875 // On position 0 of the array false is returned.
876 thilo.boehm 1.49 while (_isInnerElementChar[Uint8(*p)])
877 kumpf 1.45 p++; 878 } 879 880 return true; 881 } 882 883 Boolean XmlParser::_getElementName(char& p, const char& localName) 884 { 885 if (!_getQName(p, localName)) 886 throw XmlException(XmlException::BAD_START_TAG, _line); 887
888 mike 1.13 // The next character must be a space: 889
890 chuck 1.26 if (_isspace(*p))
891 mike 1.13 {
892 david.dillard 1.32 *p++ = '\0';
893 mike 1.34 _skipWhitespace(_line, p);
894 mike 1.13 } 895 896 if (*p == '>') 897 {
898 david.dillard 1.32 *p++ = '\0'; 899 return true;
900 mike 1.13 } 901 902 return false; 903 } 904
905 kumpf 1.45 Boolean XmlParser::_getOpenElementName( 906 char& p, 907 const char& localName, 908 Boolean& openCloseElement)
909 mike 1.13 { 910 openCloseElement = false; 911
912 kumpf 1.45 if (!_getQName(p, localName))
913 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
914 mike 1.35
915 mike 1.13 // The next character must be a space: 916
917 chuck 1.26 if (_isspace(*p))
918 mike 1.13 {
919 david.dillard 1.32 *p++ = '\0';
920 mike 1.34 _skipWhitespace(_line, p);
921 mike 1.13 } 922 923 if (*p == '>') 924 {
925 david.dillard 1.32 *p++ = '\0'; 926 return true;
927 mike 1.13 } 928 929 if (p[0] == '/' && p[1] == '>') 930 {
931 david.dillard 1.32 openCloseElement = true; 932 *p = '\0'; 933 p += 2; 934 return true;
935 mike 1.13 } 936 937 return false; 938 } 939
940 kumpf 1.45 void XmlParser::_getAttributeNameAndEqual(char& p, const char& localName)
941 mike 1.13 {
942 kumpf 1.45 if (!_getQName(p, localName))
943 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
944 mike 1.35
945 mike 1.13 char* term = p; 946
947 mike 1.34 _skipWhitespace(_line, p);
948 mike 1.13 949 if (*p != '=')
950 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
951 mike 1.13 952 p++; 953
954 mike 1.34 _skipWhitespace(_line, p);
955 mike 1.13 956 term = '\0'; 957 } 958 959 void XmlParser::_getComment(char& p) 960 { 961 // Now p points to first non-whitespace character beyond "<--" sequence: 962 963 for (; *p; p++) 964 {
965 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 966 { 967 if (p[2] != '>') 968 { 969 throw XmlException( 970 XmlException::MINUS_MINUS_IN_COMMENT, _line); 971 } 972 973 // Find end of comment (excluding whitespace): 974 975 *p = '\0'; 976 p += 3; 977 return; 978 }
979 mike 1.13 } 980 981 // If it got this far, then the comment is unterminated: 982 983 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 984 } 985 986 void XmlParser::_getCData(char& p) 987 { 988 // At this point p points one past "<![CDATA[" sequence: 989 990 for (; p; p++) 991 {
992 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 993 { 994 p = '\0'; 995 p += 3; 996 return; 997 } 998 else if (p == '\n') 999 _line++;
1000 mike 1.13 } 1001 1002 // If it got this far, then the comment is unterminated: 1003 1004 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 1005 } 1006 1007 void XmlParser::_getDocType(char& p) 1008 { 1009 // Just ignore the DOCTYPE command for now: 1010 1011 for (; p && *p != '>'; p++) 1012 {
1013 david.dillard 1.32 if (*p == '\n') 1014 _line++;
1015 mike 1.13 } 1016 1017 if (*p != '>')
1018 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
1019 mike 1.13 1020 p++; 1021 } 1022 1023 void XmlParser::_getElement(char& p, XmlEntry& entry) 1024 { 1025 //-------------------------------------------------------------------------- 1026 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 1027 //-------------------------------------------------------------------------- 1028 1029 if (p == '?') 1030 {
1031 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION; 1032 entry.text = ++p;
1033 mike 1.13
1034 kumpf 1.45 if (_getElementName(p, entry.localName))
1035 david.dillard 1.32 return;
1036 mike 1.13 } 1037 else if (*p == '!') 1038 {
1039 david.dillard 1.32 p++;
1040 mike 1.13
1041 david.dillard 1.32 // Expect a comment or CDATA:
1042 mike 1.13
1043 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 1044 { 1045 p += 2; 1046 entry.type = XmlEntry::COMMENT; 1047 entry.text = p; 1048 _getComment(p); 1049 return; 1050 } 1051 else if (memcmp(p, "[CDATA[", 7) == 0) 1052 { 1053 p += 7; 1054 entry.type = XmlEntry::CDATA; 1055 entry.text = p; 1056 _getCData(p);
1057 venkat.puvvada 1.54 entry.textLen = strlen(entry.text);
1058 david.dillard 1.32 return; 1059 } 1060 else if (memcmp(p, "DOCTYPE", 7) == 0) 1061 { 1062 entry.type = XmlEntry::DOCTYPE;
1063 kumpf 1.37 entry.text = "";
1064 david.dillard 1.32 _getDocType(p); 1065 return; 1066 } 1067 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
1068 mike 1.13 } 1069 else if (*p == '/') 1070 {
1071 david.dillard 1.32 entry.type = XmlEntry::END_TAG; 1072 entry.text = ++p;
1073 mike 1.13
1074 kumpf 1.45 if (!_getElementName(p, entry.localName))
1075 david.dillard 1.32 throw(XmlException(XmlException::BAD_END_TAG, _line));
1076 mike 1.13
1077 david.dillard 1.32 return;
1078 mike 1.13 }
1079 thilo.boehm 1.49 else if (CharSet::isAlphaUnder(Uint8(*p)))
1080 mike 1.13 {
1081 david.dillard 1.32 entry.type = XmlEntry::START_TAG; 1082 entry.text = p;
1083 mike 1.13
1084 david.dillard 1.32 Boolean openCloseElement = false;
1085 mike 1.13
1086 kumpf 1.45 if (_getOpenElementName(p, entry.localName, openCloseElement))
1087 david.dillard 1.32 { 1088 if (openCloseElement) 1089 entry.type = XmlEntry::EMPTY_TAG; 1090 return; 1091 }
1092 mike 1.13 } 1093 else
1094 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
1095 mike 1.13 1096 //-------------------------------------------------------------------------- 1097 // Grab all the attributes: 1098 //-------------------------------------------------------------------------- 1099 1100 for (;;) 1101 {
1102 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION) 1103 { 1104 if (p[0] == '?' && p[1] == '>') 1105 { 1106 p += 2; 1107 return; 1108 } 1109 } 1110 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 1111 { 1112 entry.type = XmlEntry::EMPTY_TAG; 1113 p += 2; 1114 return; 1115 } 1116 else if (*p == '>') 1117 { 1118 p++; 1119 return; 1120 } 1121 1122 XmlAttribute attr;
1123 kumpf 1.45 attr.nsType = -1;
1124 david.dillard 1.32 attr.name = p;
1125 kumpf 1.45 _getAttributeNameAndEqual(p, attr.localName);
1126 david.dillard 1.32
1127 kumpf 1.37 // Get the attribute value (e.g., "some value") 1128 { 1129 if ((p != '"') && (p != '\'')) 1130 { 1131 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1132 } 1133 1134 char quote = p++; 1135 1136 char start;
1137 kumpf 1.50 _normalizeAttributeValue(_line, p, quote, start);
1138 kumpf 1.37 attr.value = start; 1139 1140 if (*p != quote) 1141 { 1142 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1143 } 1144 1145 // Overwrite the closing quote with a null-terminator:
1146 david.dillard 1.32
1147 kumpf 1.37 *p++ = '\0'; 1148 }
1149 david.dillard 1.32 1150 if (entry.type == XmlEntry::XML_DECLARATION) 1151 { 1152 // The next thing must a space or a "?>":
1153 mike 1.13
1154 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(p)) 1155 { 1156 throw XmlException( 1157 XmlException::BAD_ATTRIBUTE_VALUE, _line); 1158 } 1159 } 1160 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| _isspace(*p))) 1161 { 1162 // The next thing must be a space or a '>':
1163 mike 1.13
1164 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1165 }
1166 mike 1.13
1167 mike 1.34 _skipWhitespace(_line, p);
1168 david.dillard 1.32
1169 kumpf 1.45 entry.attributes.append(attr);
1170 mike 1.13 } 1171 } 1172 1173 static const char* _typeStrings[] = 1174 {
1175 david.dillard 1.32 "XML_DECLARATION", 1176 "START_TAG", 1177 "EMPTY_TAG", 1178 "END_TAG",
1179 mike 1.13 "COMMENT", 1180 "CDATA", 1181 "DOCTYPE",
1182 david.dillard 1.32 "CONTENT"
1183 mike 1.13 }; 1184 1185 void XmlEntry::print() const 1186 { 1187 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 1188 1189 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 1190 1191 if (needQuotes)
1192 david.dillard 1.32 PEGASUS_STD(cout) << "\""; 1193
1194 mike 1.13 _printValue(text); 1195 1196 if (needQuotes)
1197 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1198 mike 1.13 1199 PEGASUS_STD(cout) << '\n'; 1200
1201 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
1202 mike 1.13 {
1203 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 1204 _printValue(attributes[i].value); 1205 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1206 mike 1.13 } 1207 } 1208 1209 const XmlAttribute* XmlEntry::findAttribute( 1210 const char* name) const 1211 {
1212 kumpf 1.45 for (Uint32 i = 0, n = attributes.size(); i < n; i++)
1213 mike 1.13 {
1214 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0) 1215 return &attributes[i];
1216 mike 1.13 } 1217 1218 return 0; 1219 } 1220
1221 kumpf 1.45 const XmlAttribute* XmlEntry::findAttribute(
1222 kumpf 1.47 int attrNsType,
1223 kumpf 1.45 const char* name) const 1224 { 1225 for (Uint32 i = 0, n = attributes.size(); i < n; i++) 1226 {
1227 kumpf 1.47 if ((attributes[i].nsType == attrNsType) &&
1228 kumpf 1.45 (strcmp(attributes[i].localName, name) == 0)) 1229 { 1230 return &attributes[i]; 1231 } 1232 } 1233 1234 return 0; 1235 } 1236
1237 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace 1238 // character (set last one past this). For example, consider this string: 1239 //
1240 david.dillard 1.32 // " 87 "
1241 mike 1.13 // 1242 // The first pointer would point to '8' and the last pointer woudl point one 1243 // beyond '7'. 1244 1245 static void _findEnds(
1246 david.dillard 1.32 const char* str, 1247 const char*& first,
1248 mike 1.13 const char*& last) 1249 { 1250 first = str; 1251
1252 chuck 1.26 while (_isspace(*first))
1253 david.dillard 1.32 first++;
1254 mike 1.13 1255 if (!*first) 1256 {
1257 david.dillard 1.32 last = first; 1258 return;
1259 mike 1.13 } 1260 1261 last = first + strlen(first); 1262
1263 chuck 1.26 while (last != first && _isspace(last[-1]))
1264 david.dillard 1.32 last--;
1265 mike 1.13 } 1266 1267 Boolean XmlEntry::getAttributeValue(
1268 david.dillard 1.32 const char* name,
1269 mike 1.13 Uint32& value) const 1270 { 1271 const XmlAttribute* attr = findAttribute(name); 1272 1273 if (!attr)
1274 david.dillard 1.32 return false;
1275 mike 1.13 1276 const char* first; 1277 const char* last; 1278 _findEnds(attr->value, first, last); 1279 1280 char* end = 0; 1281 long tmp = strtol(first, &end, 10); 1282 1283 if (!end \|\| end != last)
1284 david.dillard 1.32 return false;
1285 mike 1.13 1286 value = Uint32(tmp); 1287 return true; 1288 } 1289 1290 Boolean XmlEntry::getAttributeValue(
1291 david.dillard 1.32 const char* name,
1292 mike 1.13 Real32& value) const 1293 { 1294 const XmlAttribute* attr = findAttribute(name); 1295 1296 if (!attr)
1297 david.dillard 1.32 return false;
1298 mike 1.13 1299 const char* first; 1300 const char* last; 1301 _findEnds(attr->value, first, last); 1302 1303 char* end = 0; 1304 double tmp = strtod(first, &end); 1305 1306 if (!end \|\| end != last)
1307 david.dillard 1.32 return false;
1308 mike 1.13
1309 david.dillard 1.32 value = static_cast<Real32>(tmp);
1310 mike 1.13 return true; 1311 } 1312 1313 Boolean XmlEntry::getAttributeValue(
1314 david.dillard 1.32 const char* name,
1315 mike 1.13 const char& value) const 1316 { 1317 const XmlAttribute attr = findAttribute(name); 1318 1319 if (!attr)
1320 david.dillard 1.32 return false;
1321 mike 1.13 1322 value = attr->value; 1323 return true; 1324 } 1325 1326 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1327 { 1328 const char* tmp; 1329 1330 if (!getAttributeValue(name, tmp))
1331 david.dillard 1.32 return false;
1332 mike 1.13
1333 chuck 1.28 value = String(tmp);
1334 mike 1.13 return true; 1335 } 1336
1337 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
1338 mike 1.13 {
1339 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
1340 mike 1.13 } 1341 1342 PEGASUS_NAMESPACE_END

No CVS admin address has been configured