pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
2 mike 1.13 //
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group.
13 mike 1.13 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions:
20 karl 1.38 //
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 // 32 //%///////////////////////////////////////////////////////////////////////////// 33 34 //////////////////////////////////////////////////////////////////////////////// 35 // 36 // XmlParser 37 //
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are 39 // serveral rules for well-formed XML:
40 mike 1.13 //
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
42 mike 1.13 //
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
44 mike 1.13 //
45 david.dillard 1.32 // 2. Comments have the form:
46 mike 1.13 //
47 david.dillard 1.32 // <!-- blah blah blah -->
48 mike 1.13 //
49 david.dillard 1.32 // 3. The following entity references are supported:
50 mike 1.13 //
51 david.dillard 1.32 // &amp - ampersand 52 // &lt - less-than 53 // &gt - greater-than 54 // &quot - full quote 55 // &apos - apostrophe
56 mike 1.13 //
57 kumpf 1.18 // as well as character (numeric) references:
58 mike 1.35 //
59 kumpf 1.18 // 1 - decimal reference for character '1' 60 // 1 - hexadecimal reference for character '1' 61 //
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
63 mike 1.13 //
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
65 mike 1.13 //
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
67 mike 1.13 //
68 david.dillard 1.32 // <![CDATA[ 69 // ... 70 // ]]>
71 mike 1.13 //
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
73 mike 1.13 //
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes. 75 // XmlAttribute values must be delimited.
76 mike 1.13 //
77 david.dillard 1.32 // 8. <!DOCTYPE...>
78 mike 1.13 // 79 // TODO: 80 //
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
83 mike 1.13 // rules rather than references to files). 84 //
85 david.dillard 1.32 // Remove newlines from string literals:
86 mike 1.13 // 87 // Example: <xyz x="hello
88 david.dillard 1.32 // world">
89 mike 1.13 // 90 //////////////////////////////////////////////////////////////////////////////// 91
92 sage 1.14 #include <Pegasus/Common/Config.h>
93 mike 1.13 #include <cctype> 94 #include <cstdio> 95 #include <cstdlib> 96 #include <cstring> 97 #include "XmlParser.h" 98 #include "Logger.h"
99 chuck 1.19 #include "ExceptionRep.h"
100 mike 1.34 #include "CharSet.h"
101 mike 1.13 102 PEGASUS_NAMESPACE_BEGIN 103 104 //////////////////////////////////////////////////////////////////////////////// 105 // 106 // Static helper functions 107 // 108 //////////////////////////////////////////////////////////////////////////////// 109 110 static void _printValue(const char* p) 111 { 112 for (; *p; p++) 113 {
114 david.dillard 1.32 if (p == '\n') 115 PEGASUS_STD(cout) << "\\n"; 116 else if (p == '\r') 117 PEGASUS_STD(cout) << "\\r"; 118 else if (p == '\t') 119 PEGASUS_STD(cout) << "\\t"; 120 else 121 PEGASUS_STD(cout) << p;
122 mike 1.13 } 123 } 124 125 struct EntityReference 126 { 127 const char* match; 128 Uint32 length; 129 char replacement; 130 }; 131
132 kumpf 1.18 // ATTN: Add support for more entity references
133 mike 1.13 static EntityReference _references[] = 134 { 135 { "&", 5, '&' }, 136 { "<", 4, '<' }, 137 { ">", 4, '>' }, 138 { """, 6, '"' }, 139 { "'", 6, '\'' } 140 }; 141
142 chuck 1.26 143 // Implements a check for a whitespace character, without calling 144 // isspace( ). The isspace( ) function is locale-sensitive, 145 // and incorrectly flags some chars above 0x7f as whitespace. This 146 // causes the XmlParser to incorrectly parse UTF-8 data. 147 // 148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) 149 // defines white space as:
150 david.dillard 1.32 // S ::= (#x20 \| #x9 \| #xD \| #xA)+
151 mike 1.34 static inline int _isspace(char c)
152 chuck 1.26 {
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
154 chuck 1.26 } 155
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); 157 158 //////////////////////////////////////////////////////////////////////////////// 159 // 160 // XmlException 161 // 162 //////////////////////////////////////////////////////////////////////////////// 163 164 static const char* _xmlMessages[] = 165 { 166 "Bad opening element", 167 "Bad closing element", 168 "Bad attribute name", 169 "Exepected equal sign", 170 "Bad attribute value", 171 "A \"--\" sequence found within comment", 172 "Unterminated comment", 173 "Unterminated CDATA block", 174 "Unterminated DOCTYPE", 175 "Too many attributes: parser only handles 10", 176 "Malformed reference", 177 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence", 178 "Closing element does not match opening element", 179 "One or more tags are still open", 180 "More than one root element was encountered", 181 "Validation error",
182 dmitry.mikulin 1.43.2.4 "Semantic error", 183 "Malformed namespace declaration", 184 "Namespace not supported", 185 "Namespace not declared"
186 mike 1.13 }; 187
188 david.dillard 1.32 static const char* _xmlKeys[] =
189 chuck 1.19 {
190 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
191 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 192 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 193 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 194 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 195 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 196 "Common.XmlParser.UNTERMINATED_COMMENT", 197 "Common.XmlParser.UNTERMINATED_CDATA", 198 "Common.XmlParser.UNTERMINATED_DOCTYPE", 199 "Common.XmlParser.MALFORMED_REFERENCE", 200 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 201 "Common.XmlParser.START_END_MISMATCH",
202 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
203 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS", 204 "Common.XmlParser.VALIDATION_ERROR",
205 dmitry.mikulin 1.43.2.4 "Common.XmlParser.SEMANTIC_ERROR", 206 "Common.XmlParser.MALFORMED_NAMESPACE_DECL", 207 "Common.XmlParser.UNSUPPORTED_NAMESPACE", 208 "Common.XmlParser.UNDECLARED_NAMESPACE"
209 chuck 1.19 }; 210 211
212 kumpf 1.40 static MessageLoaderParms _formMessage( 213 Uint32 code, 214 Uint32 line, 215 const String& message)
216 chuck 1.19 { 217 String dftMsg = _xmlMessages[Uint32(code) - 1]; 218 String key = _xmlKeys[Uint32(code) - 1];
219 dmitry.mikulin 1.43.2.5 String msg = message;
220 chuck 1.19 221 dftMsg.append(": on line $0"); 222 if (message.size()) 223 {
224 david.dillard 1.32 msg = ": " + msg; 225 dftMsg.append("$1"); 226 }
227 chuck 1.19 228 return MessageLoaderParms(key, dftMsg, line ,msg); 229 } 230 231 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 232 { 233 String dftMsg = _xmlMessages[Uint32(code) - 1]; 234 String key = _xmlKeys[Uint32(code) - 1]; 235 236 dftMsg.append(": on line $0");
237 david.dillard 1.32
238 chuck 1.19 return MessageLoaderParms(key, dftMsg, line); 239 } 240
241 mike 1.13 242 XmlException::XmlException(
243 david.dillard 1.32 XmlException::Code code,
244 mike 1.13 Uint32 lineNumber,
245 david.dillard 1.32 const String& message)
246 mike 1.13 : Exception(_formMessage(code, lineNumber, message)) 247 { 248 249 } 250
251 chuck 1.19 252 XmlException::XmlException(
253 david.dillard 1.32 XmlException::Code code,
254 chuck 1.19 Uint32 lineNumber,
255 david.dillard 1.32 MessageLoaderParms& msgParms)
256 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber)) 257 {
258 david.dillard 1.32 if (msgParms.default_msg.size())
259 humberto 1.21 {
260 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg; 261 } 262 _rep->message.append(MessageLoader::getMessage(msgParms));
263 chuck 1.19 } 264 265
266 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 267 // 268 // XmlValidationError 269 // 270 //////////////////////////////////////////////////////////////////////////////// 271 272 XmlValidationError::XmlValidationError( 273 Uint32 lineNumber, 274 const String& message) 275 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 276 { 277 } 278
279 chuck 1.19 280 XmlValidationError::XmlValidationError( 281 Uint32 lineNumber, 282 MessageLoaderParms& msgParms) 283 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 284 { 285 } 286 287
288 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 289 // 290 // XmlSemanticError 291 // 292 //////////////////////////////////////////////////////////////////////////////// 293 294 XmlSemanticError::XmlSemanticError( 295 Uint32 lineNumber, 296 const String& message) 297 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 298 { 299 }
300 chuck 1.19 301 302 XmlSemanticError::XmlSemanticError( 303 Uint32 lineNumber, 304 MessageLoaderParms& msgParms) 305 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 306 { 307 } 308
309 mike 1.13 310 //////////////////////////////////////////////////////////////////////////////// 311 // 312 // XmlParser 313 // 314 //////////////////////////////////////////////////////////////////////////////// 315
316 dmitry.mikulin 1.43.2.2 XmlParser::XmlParser(char* text, XmlNamespace* ns)
317 kumpf 1.40 : _line(1), 318 _current(text), 319 _restoreChar('\0'),
320 dmitry.mikulin 1.43.2.2 _foundRoot(false), 321 _scopeLevel(0), 322 _supportedNamespaces(ns)
323 mike 1.13 { 324 } 325
326 mike 1.34 inline void _skipWhitespace(Uint32& line, char& p) 327 { 328 while (p && _isspace(p)) 329 { 330 if (p == '\n') 331 line++; 332 333 p++; 334 } 335 } 336
337 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) \|\| \ 338 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) 339 #pragma optimize( "", off ) 340 #endif
341 kumpf 1.37 static int _getEntityRef(char*& p) 342 { 343 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) 344 { 345 p += 3; 346 return '>'; 347 } 348 349 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) 350 { 351 p += 3; 352 return '<'; 353 } 354 355 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && 356 (p[4] == ';')) 357 { 358 p += 5; 359 return '\''; 360 } 361 362 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && 363 (p[4] == ';')) 364 { 365 p += 5; 366 return '"'; 367 } 368 369 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) 370 { 371 p += 4; 372 return '&'; 373 } 374 375 return -1; 376 }
377 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) \|\| \ 378 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) 379 #pragma optimize( "", on ) 380 #endif
381 kumpf 1.37 382 static inline int _getCharRef(char& p, bool hex) 383 { 384 char end; 385 unsigned long ch; 386 387 if (hex) 388 { 389 ch = strtoul(p, &end, 16); 390 } 391 else 392 { 393 ch = strtoul(p, &end, 10); 394 } 395 396 if ((end == p) \|\| (end != ';') \|\| (ch > 255)) 397 { 398 return -1; 399 } 400 401 if ((hex && (end - p > 4)) \|\| (!hex && (end - p > 5))) 402 kumpf 1.37 { 403 return -1; 404 } 405 406 p = end + 1; 407 408 return ch; 409 } 410 411 static void _normalize(Uint32& line, char& p, char end_char, char& start) 412 { 413 // Skip over leading whitespace: 414 415 _skipWhitespace(line, p); 416 start = p; 417 418 // Process one character at a time: 419 420 char q = p; 421 422 while (p && (p != end_char)) 423 kumpf 1.37 { 424 if (_isspace(p)) 425 { 426 // Compress sequences of whitespace characters to a single space 427 // character. Update line number when newlines encountered. 428 429 if (p++ == '\n') 430 { 431 line++; 432 } 433 434 q++ = ' '; 435 436 _skipWhitespace(line, p); 437 } 438 else if (p == '&') 439 { 440 // Process entity characters and entity references: 441 442 p++; 443 int ch; 444 kumpf 1.37 445 if (p == '#') 446 { 447 p++; 448 449 if (p == 'x') 450 { 451 p++; 452 ch = _getCharRef(p, true); 453 } 454 else 455 { 456 ch = _getCharRef(p, false); 457 } 458 } 459 else 460 { 461 ch = _getEntityRef(p); 462 } 463 464 if (ch == -1) 465 kumpf 1.37 { 466 throw XmlException(XmlException::MALFORMED_REFERENCE, line); 467 } 468 469 q++ = ch; 470 } 471 else 472 { 473 q++ = p++; 474 } 475 } 476
477 kumpf 1.40 // We encountered a the end_char or a zero-terminator.
478 kumpf 1.37 479 q = p; 480 481 // Remove single trailing whitespace (consecutive whitespaces already 482 // compressed above). Since p >= q, we can tell if we need to strip a 483 // trailing space from q by looking at the end of p. We must not look at 484 // the last character of p, though, if p is an empty string. 485 486 if ((p != start) && _isspace(p[-1])) 487 { 488 q--; 489 } 490 491 // If q got behind p, it is safe and necessary to null-terminate q 492 493 if (q != p) 494 { 495 *q = '\0'; 496 } 497 } 498
499 kumpf 1.43.2.1 Boolean XmlParser::next( 500 XmlEntry& entry,
501 dmitry.mikulin 1.43.2.2 Boolean includeComment)
502 mike 1.13 {
503 kumpf 1.43.2.1 entry.attributes.clear(); 504
505 mike 1.13 if (!_putBackStack.isEmpty()) 506 {
507 david.dillard 1.32 entry = _putBackStack.top(); 508 _putBackStack.pop(); 509 return true;
510 mike 1.13 } 511 512 // If a character was overwritten with a null-terminator the last 513 // time this routine was called, then put back that character. Before 514 // exiting of course, restore the null-terminator. 515 516 char* nullTerminator = 0; 517 518 if (_restoreChar && !*_current) 519 {
520 david.dillard 1.32 nullTerminator = _current; 521 *_current = _restoreChar; 522 _restoreChar = '\0';
523 mike 1.13 } 524
525 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false. 526 do 527 { 528 // Skip over any whitespace: 529 _skipWhitespace(_line, _current); 530 531 if (!_current) 532 { 533 if (nullTerminator) 534 nullTerminator = '\0';
535 mike 1.13
536 venkat.puvvada 1.41 if (!_stack.isEmpty()) 537 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
538 mike 1.13
539 venkat.puvvada 1.41 return false; 540 }
541 mike 1.13
542 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
543 mike 1.13
544 venkat.puvvada 1.41 if (*_current == '<') 545 { 546 _current++; 547 _getElement(_current, entry);
548 mike 1.13
549 venkat.puvvada 1.41 if (nullTerminator) 550 *nullTerminator = '\0';
551 mike 1.13
552 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG) 553 { 554 if (_stack.isEmpty() && _foundRoot) 555 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
556 mike 1.13
557 venkat.puvvada 1.41 _foundRoot = true; 558 _stack.push((char*)entry.text); 559 } 560 else if (entry.type == XmlEntry::END_TAG) 561 { 562 if (_stack.isEmpty()) 563 throw XmlException(XmlException::START_END_MISMATCH, _line);
564 mike 1.13
565 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0) 566 throw XmlException(XmlException::START_END_MISMATCH, _line);
567 david.dillard 1.32
568 venkat.puvvada 1.41 _stack.pop(); 569 }
570 david.dillard 1.32 }
571 venkat.puvvada 1.41 else
572 david.dillard 1.32 {
573 venkat.puvvada 1.41 // Normalize the content:
574 mike 1.13
575 venkat.puvvada 1.41 char* start; 576 _normalize(_line, _current, '<', start);
577 mike 1.13
578 venkat.puvvada 1.41 // Get the content:
579 mike 1.13
580 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT; 581 entry.text = start;
582 kumpf 1.37
583 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
584 kumpf 1.37
585 venkat.puvvada 1.41 _restoreChar = _current; 586 _current = '\0';
587 kumpf 1.37
588 venkat.puvvada 1.41 if (nullTerminator) 589 *nullTerminator = '\0'; 590 }
591 dmitry.mikulin 1.43.2.2 } while (!includeComment && entry.type == XmlEntry::COMMENT); 592
593 kumpf 1.43.2.3 if (_supportedNamespaces && 594 (entry.type == XmlEntry::START_TAG \|\| 595 entry.type == XmlEntry::EMPTY_TAG \|\| 596 entry.type == XmlEntry::END_TAG))
597 dmitry.mikulin 1.43.2.2 { 598 // Process attributes and enter namespaces into the table 599 if (entry.type == XmlEntry::START_TAG \|\| 600 entry.type == XmlEntry::EMPTY_TAG) 601 { 602 _scopeLevel++; 603 for (unsigned int i = 0; i < entry.attributes.size(); i++) 604 {
605 kumpf 1.43.2.3 XmlAttribute& attr = entry.attributes[i];
606 dmitry.mikulin 1.43.2.4 if (strncmp(attr.name, "xmlns", 5) == 0)
607 dmitry.mikulin 1.43.2.2 { 608 XmlNamespace ns;
609 dmitry.mikulin 1.43.2.4 if (attr.name[5] == ':')
610 dmitry.mikulin 1.43.2.2 {
611 dmitry.mikulin 1.43.2.4 ns.localName = attr.name + 6; 612 613 // Check if we have malformed XML of the form: 614 // "xmlns:=URI". In this case attr.name will be set 615 // to "xmlns:" and ns.localName will point to '\0' 616 if (ns.localName[0] == '\0') 617 { 618 throw XmlException( 619 XmlException::MALFORMED_NAMESPACE_DECL, 620 _line); 621 }
622 dmitry.mikulin 1.43.2.2 } 623 else 624 {
625 dmitry.mikulin 1.43.2.4 // Default name space has no local name 626 ns.localName = 0;
627 dmitry.mikulin 1.43.2.2 }
628 dmitry.mikulin 1.43.2.4 ns.extendedName = attr.value; 629 ns.scopeLevel = _scopeLevel; 630 ns.type = getSupportedNamespaceType(ns.extendedName); 631 632 // Even unsupported namespaces get pushed onto the stack. 633 // We will throw an exception of there is an attempt to 634 // reference an unsupported namespace later. 635 _nameSpaces.push(ns); 636 } 637 else 638 { 639 // Attribute names may also be namespace qualified. 640 attr.nsType = _getNamespaceType(attr.name);
641 dmitry.mikulin 1.43.2.2 } 642 } 643 }
644 kumpf 1.43.2.3
645 dmitry.mikulin 1.43.2.4 // Get the namespace type for this tag. 646 entry.nsType = _getNamespaceType(entry.text);
647 dmitry.mikulin 1.43.2.2
648 kumpf 1.43.2.3 if (entry.type == XmlEntry::END_TAG \|\| 649 entry.type == XmlEntry::EMPTY_TAG) 650 {
651 dmitry.mikulin 1.43.2.2 // Remove any namespaces of the current scope level from 652 // the scope stack. 653 while (!_nameSpaces.isEmpty() && 654 _scopeLevel <= _nameSpaces.top().scopeLevel) 655 { 656 _nameSpaces.pop(); 657 } 658 659 PEGASUS_ASSERT(_scopeLevel > 0); 660 _scopeLevel--; 661 } 662 } 663 else 664 { 665 entry.nsType = -1; 666 }
667 kumpf 1.37
668 venkat.puvvada 1.41 return true;
669 mike 1.13 } 670
671 dmitry.mikulin 1.43.2.2 // Get the namespace type of the given tag 672 int XmlParser::_getNamespaceType(const char* tag) 673 { 674 const char* pos = strchr(tag, ':'); 675
676 dmitry.mikulin 1.43.2.4 // If ":" is not found, the tag is not namespace qualified and we 677 // need to look for the default name space.
678 dmitry.mikulin 1.43.2.2 679 // Search the namespace stack from the top 680 for (int i = _nameSpaces.size() - 1; i >=0; i--) 681 {
682 dmitry.mikulin 1.43.2.4 // If ":" is found, look for the name space with the matching 683 // local name... 684 if ((pos && _nameSpaces[i].localName && 685 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) \|\| 686 // ... otherwise look for the default name space. It's the 687 // one with localName set to NULL 688 (!pos && !_nameSpaces[i].localName)) 689 { 690 // If it's a reference to an unsupported namespace, 691 // throw an exception 692 if (_nameSpaces[i].type == -1) 693 { 694 throw XmlException(XmlException::UNSUPPORTED_NAMESPACE, _line); 695 }
696 dmitry.mikulin 1.43.2.2 return _nameSpaces[i].type; 697 } 698 }
699 dmitry.mikulin 1.43.2.4 700 // If the tag is namespace qualified, but the name space has not been
701 kumpf 1.43.2.7 // declared, it's malformed XML and we must throw an exception. 702 // Note: The "xml" namespace is specifically defined by the W3C as a 703 // reserved prefix ("http://www.w3.org/XML/1998/namespace"). 704 if (pos && (strncmp(tag, "xml:", 4) != 0))
705 dmitry.mikulin 1.43.2.4 { 706 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line); 707 } 708 709 // Otherwise it's OK not to have a name space.
710 dmitry.mikulin 1.43.2.2 return -1; 711 } 712 713 // Gived the extended namespace name, find it in the table of supported 714 // namespaces and return its type. 715 int XmlParser::getSupportedNamespaceType(const char* extendedName) 716 { 717 for (int i = 0; 718 _supportedNamespaces[i].localName != 0; 719 i++) 720 { 721 PEGASUS_ASSERT(_supportedNamespaces[i].type == i); 722 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName)) 723 { 724 return _supportedNamespaces[i].type; 725 } 726 } 727 return -1; 728 } 729 730 XmlNamespace* XmlParser::getNamespace(int nsType) 731 dmitry.mikulin 1.43.2.2 { 732 for (int i = _nameSpaces.size() - 1; i >=0; i--) 733 { 734 if (_nameSpaces[i].type == nsType) 735 { 736 return &_nameSpaces[i]; 737 } 738 } 739 return 0; 740 } 741
742 mike 1.13 void XmlParser::putBack(XmlEntry& entry) 743 { 744 _putBackStack.push(entry); 745 } 746 747 XmlParser::~XmlParser() 748 { 749 // Nothing to do! 750 } 751
752 mike 1.35 // A-Za-z0-9_-:.
753 kumpf 1.40 static unsigned char _isInnerElementChar[] =
754 mike 1.35 { 755 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 756 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1, 757 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 758 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 759 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 760 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 761 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 762 }; 763
764 mike 1.13 Boolean XmlParser::_getElementName(char*& p) 765 {
766 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
767 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
768 mike 1.35
769 kumpf 1.24 p++;
770 mike 1.13
771 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
772 david.dillard 1.32 p++;
773 mike 1.13 774 // The next character must be a space: 775
776 chuck 1.26 if (_isspace(*p))
777 mike 1.13 {
778 david.dillard 1.32 *p++ = '\0';
779 mike 1.34 _skipWhitespace(_line, p);
780 mike 1.13 } 781 782 if (*p == '>') 783 {
784 david.dillard 1.32 *p++ = '\0'; 785 return true;
786 mike 1.13 } 787 788 return false; 789 } 790 791 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement) 792 { 793 openCloseElement = false; 794
795 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
796 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
797 mike 1.35
798 kumpf 1.24 p++;
799 mike 1.13
800 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
801 david.dillard 1.32 p++;
802 mike 1.13 803 // The next character must be a space: 804
805 chuck 1.26 if (_isspace(*p))
806 mike 1.13 {
807 david.dillard 1.32 *p++ = '\0';
808 mike 1.34 _skipWhitespace(_line, p);
809 mike 1.13 } 810 811 if (*p == '>') 812 {
813 david.dillard 1.32 *p++ = '\0'; 814 return true;
815 mike 1.13 } 816 817 if (p[0] == '/' && p[1] == '>') 818 {
819 david.dillard 1.32 openCloseElement = true; 820 *p = '\0'; 821 p += 2; 822 return true;
823 mike 1.13 } 824 825 return false; 826 } 827 828 void XmlParser::_getAttributeNameAndEqual(char*& p) 829 {
830 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
831 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
832 mike 1.35
833 kumpf 1.24 p++;
834 mike 1.13
835 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
836 david.dillard 1.32 p++;
837 mike 1.13 838 char* term = p; 839
840 mike 1.34 _skipWhitespace(_line, p);
841 mike 1.13 842 if (*p != '=')
843 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
844 mike 1.13 845 p++; 846
847 mike 1.34 _skipWhitespace(_line, p);
848 mike 1.13 849 term = '\0'; 850 } 851 852 void XmlParser::_getComment(char& p) 853 { 854 // Now p points to first non-whitespace character beyond "<--" sequence: 855 856 for (; *p; p++) 857 {
858 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 859 { 860 if (p[2] != '>') 861 { 862 throw XmlException( 863 XmlException::MINUS_MINUS_IN_COMMENT, _line); 864 } 865 866 // Find end of comment (excluding whitespace): 867 868 *p = '\0'; 869 p += 3; 870 return; 871 }
872 mike 1.13 } 873 874 // If it got this far, then the comment is unterminated: 875 876 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 877 } 878 879 void XmlParser::_getCData(char& p) 880 { 881 // At this point p points one past "<![CDATA[" sequence: 882 883 for (; p; p++) 884 {
885 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 886 { 887 p = '\0'; 888 p += 3; 889 return; 890 } 891 else if (p == '\n') 892 _line++;
893 mike 1.13 } 894 895 // If it got this far, then the comment is unterminated: 896 897 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 898 } 899 900 void XmlParser::_getDocType(char& p) 901 { 902 // Just ignore the DOCTYPE command for now: 903 904 for (; p && *p != '>'; p++) 905 {
906 david.dillard 1.32 if (*p == '\n') 907 _line++;
908 mike 1.13 } 909 910 if (*p != '>')
911 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
912 mike 1.13 913 p++; 914 } 915 916 void XmlParser::_getElement(char& p, XmlEntry& entry) 917 { 918 //-------------------------------------------------------------------------- 919 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 920 //-------------------------------------------------------------------------- 921 922 if (p == '?') 923 {
924 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION; 925 entry.text = ++p;
926 mike 1.13
927 david.dillard 1.32 Boolean openCloseElement = false;
928 mike 1.13
929 david.dillard 1.32 if (_getElementName(p)) 930 return;
931 mike 1.13 } 932 else if (*p == '!') 933 {
934 david.dillard 1.32 p++;
935 mike 1.13
936 david.dillard 1.32 // Expect a comment or CDATA:
937 mike 1.13
938 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 939 { 940 p += 2; 941 entry.type = XmlEntry::COMMENT; 942 entry.text = p; 943 _getComment(p); 944 return; 945 } 946 else if (memcmp(p, "[CDATA[", 7) == 0) 947 { 948 p += 7; 949 entry.type = XmlEntry::CDATA; 950 entry.text = p; 951 _getCData(p); 952 return; 953 } 954 else if (memcmp(p, "DOCTYPE", 7) == 0) 955 { 956 entry.type = XmlEntry::DOCTYPE;
957 kumpf 1.37 entry.text = "";
958 david.dillard 1.32 _getDocType(p); 959 return; 960 } 961 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
962 mike 1.13 } 963 else if (*p == '/') 964 {
965 david.dillard 1.32 entry.type = XmlEntry::END_TAG; 966 entry.text = ++p;
967 mike 1.13
968 david.dillard 1.32 if (!_getElementName(p)) 969 throw(XmlException(XmlException::BAD_END_TAG, _line));
970 mike 1.13
971 david.dillard 1.32 return;
972 mike 1.13 }
973 david 1.25 else if ((((p >= 'A') && (p <= 'Z')) \|\|
974 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 975 (*p == '_')))
976 mike 1.13 {
977 david.dillard 1.32 entry.type = XmlEntry::START_TAG; 978 entry.text = p;
979 mike 1.13
980 david.dillard 1.32 Boolean openCloseElement = false;
981 mike 1.13
982 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement)) 983 { 984 if (openCloseElement) 985 entry.type = XmlEntry::EMPTY_TAG; 986 return; 987 }
988 mike 1.13 } 989 else
990 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
991 mike 1.13 992 //-------------------------------------------------------------------------- 993 // Grab all the attributes: 994 //-------------------------------------------------------------------------- 995 996 for (;;) 997 {
998 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION) 999 { 1000 if (p[0] == '?' && p[1] == '>') 1001 { 1002 p += 2; 1003 return; 1004 } 1005 } 1006 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 1007 { 1008 entry.type = XmlEntry::EMPTY_TAG; 1009 p += 2; 1010 return; 1011 } 1012 else if (*p == '>') 1013 { 1014 p++; 1015 return; 1016 } 1017 1018 XmlAttribute attr;
1019 dmitry.mikulin 1.43.2.4 attr.nsType = -1;
1020 david.dillard 1.32 attr.name = p; 1021 _getAttributeNameAndEqual(p); 1022
1023 kumpf 1.37 // Get the attribute value (e.g., "some value") 1024 { 1025 if ((p != '"') && (p != '\'')) 1026 { 1027 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1028 } 1029 1030 char quote = p++; 1031 1032 char start; 1033 _normalize(_line, p, quote, start); 1034 attr.value = start; 1035 1036 if (*p != quote) 1037 { 1038 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1039 } 1040 1041 // Overwrite the closing quote with a null-terminator:
1042 david.dillard 1.32
1043 kumpf 1.37 *p++ = '\0'; 1044 }
1045 david.dillard 1.32 1046 if (entry.type == XmlEntry::XML_DECLARATION) 1047 { 1048 // The next thing must a space or a "?>":
1049 mike 1.13
1050 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(p)) 1051 { 1052 throw XmlException( 1053 XmlException::BAD_ATTRIBUTE_VALUE, _line); 1054 } 1055 } 1056 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| _isspace(*p))) 1057 { 1058 // The next thing must be a space or a '>':
1059 mike 1.13
1060 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1061 }
1062 mike 1.13
1063 mike 1.34 _skipWhitespace(_line, p);
1064 david.dillard 1.32
1065 kumpf 1.43.2.1 entry.attributes.append(attr);
1066 mike 1.13 } 1067 } 1068 1069 static const char* _typeStrings[] = 1070 {
1071 david.dillard 1.32 "XML_DECLARATION", 1072 "START_TAG", 1073 "EMPTY_TAG", 1074 "END_TAG",
1075 mike 1.13 "COMMENT", 1076 "CDATA", 1077 "DOCTYPE",
1078 david.dillard 1.32 "CONTENT"
1079 mike 1.13 }; 1080
1081 kumpf 1.43.2.6 const char* XmlEntry::getUnqualifiedName() const 1082 { 1083 PEGASUS_ASSERT( 1084 (type == XmlEntry::START_TAG) \|\| 1085 (type == XmlEntry::EMPTY_TAG) \|\| 1086 (type == XmlEntry::END_TAG)); 1087 1088 const char* colonPos = strchr(text, ':'); 1089 if (colonPos == NULL) 1090 { 1091 return text; 1092 } 1093 1094 return colonPos + 1; 1095 } 1096
1097 mike 1.13 void XmlEntry::print() const 1098 { 1099 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 1100 1101 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 1102 1103 if (needQuotes)
1104 david.dillard 1.32 PEGASUS_STD(cout) << "\""; 1105
1106 mike 1.13 _printValue(text); 1107 1108 if (needQuotes)
1109 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1110 mike 1.13 1111 PEGASUS_STD(cout) << '\n'; 1112
1113 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
1114 mike 1.13 {
1115 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 1116 _printValue(attributes[i].value); 1117 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1118 mike 1.13 } 1119 } 1120 1121 const XmlAttribute* XmlEntry::findAttribute( 1122 const char* name) const 1123 {
1124 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
1125 mike 1.13 {
1126 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0) 1127 return &attributes[i];
1128 mike 1.13 } 1129 1130 return 0; 1131 } 1132
1133 kumpf 1.43.2.6 const XmlAttribute* XmlEntry::findAttribute( 1134 int nsType, 1135 const char* name) const 1136 { 1137 for (Uint32 i = 0; i < attributes.size(); i++) 1138 { 1139 if ((attributes[i].nsType == nsType) && 1140 (strcmp(attributes[i].name, name) == 0)) 1141 { 1142 return &attributes[i]; 1143 } 1144 } 1145 1146 return 0; 1147 } 1148
1149 mike 1.13 // Find first non-whitespace character (set first) and last non-whitespace 1150 // character (set last one past this). For example, consider this string: 1151 //
1152 david.dillard 1.32 // " 87 "
1153 mike 1.13 // 1154 // The first pointer would point to '8' and the last pointer woudl point one 1155 // beyond '7'. 1156 1157 static void _findEnds(
1158 david.dillard 1.32 const char* str, 1159 const char*& first,
1160 mike 1.13 const char*& last) 1161 { 1162 first = str; 1163
1164 chuck 1.26 while (_isspace(*first))
1165 david.dillard 1.32 first++;
1166 mike 1.13 1167 if (!*first) 1168 {
1169 david.dillard 1.32 last = first; 1170 return;
1171 mike 1.13 } 1172 1173 last = first + strlen(first); 1174
1175 chuck 1.26 while (last != first && _isspace(last[-1]))
1176 david.dillard 1.32 last--;
1177 mike 1.13 } 1178 1179 Boolean XmlEntry::getAttributeValue(
1180 david.dillard 1.32 const char* name,
1181 mike 1.13 Uint32& value) const 1182 { 1183 const XmlAttribute* attr = findAttribute(name); 1184 1185 if (!attr)
1186 david.dillard 1.32 return false;
1187 mike 1.13 1188 const char* first; 1189 const char* last; 1190 _findEnds(attr->value, first, last); 1191 1192 char* end = 0; 1193 long tmp = strtol(first, &end, 10); 1194 1195 if (!end \|\| end != last)
1196 david.dillard 1.32 return false;
1197 mike 1.13 1198 value = Uint32(tmp); 1199 return true; 1200 } 1201 1202 Boolean XmlEntry::getAttributeValue(
1203 david.dillard 1.32 const char* name,
1204 mike 1.13 Real32& value) const 1205 { 1206 const XmlAttribute* attr = findAttribute(name); 1207 1208 if (!attr)
1209 david.dillard 1.32 return false;
1210 mike 1.13 1211 const char* first; 1212 const char* last; 1213 _findEnds(attr->value, first, last); 1214 1215 char* end = 0; 1216 double tmp = strtod(first, &end); 1217 1218 if (!end \|\| end != last)
1219 david.dillard 1.32 return false;
1220 mike 1.13
1221 david.dillard 1.32 value = static_cast<Real32>(tmp);
1222 mike 1.13 return true; 1223 } 1224 1225 Boolean XmlEntry::getAttributeValue(
1226 david.dillard 1.32 const char* name,
1227 mike 1.13 const char& value) const 1228 { 1229 const XmlAttribute attr = findAttribute(name); 1230 1231 if (!attr)
1232 david.dillard 1.32 return false;
1233 mike 1.13 1234 value = attr->value; 1235 return true; 1236 } 1237 1238 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1239 { 1240 const char* tmp; 1241 1242 if (!getAttributeValue(name, tmp))
1243 david.dillard 1.32 return false;
1244 mike 1.13
1245 chuck 1.28 value = String(tmp);
1246 mike 1.13 return true; 1247 } 1248
1249 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
1250 mike 1.13 {
1251 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
1252 mike 1.13 } 1253 1254 PEGASUS_NAMESPACE_END

No CVS admin address has been configured