pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
2 mike 1.13 //
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group.
13 mike 1.13 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions:
20 karl 1.38 //
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 // 32 //%///////////////////////////////////////////////////////////////////////////// 33 34 //////////////////////////////////////////////////////////////////////////////// 35 // 36 // XmlParser 37 //
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are 39 // serveral rules for well-formed XML:
40 mike 1.13 //
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
42 mike 1.13 //
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
44 mike 1.13 //
45 david.dillard 1.32 // 2. Comments have the form:
46 mike 1.13 //
47 david.dillard 1.32 // <!-- blah blah blah -->
48 mike 1.13 //
49 david.dillard 1.32 // 3. The following entity references are supported:
50 mike 1.13 //
51 david.dillard 1.32 // &amp - ampersand 52 // &lt - less-than 53 // &gt - greater-than 54 // &quot - full quote 55 // &apos - apostrophe
56 mike 1.13 //
57 kumpf 1.18 // as well as character (numeric) references:
58 mike 1.35 //
59 kumpf 1.18 // 1 - decimal reference for character '1' 60 // 1 - hexadecimal reference for character '1' 61 //
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
63 mike 1.13 //
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
65 mike 1.13 //
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
67 mike 1.13 //
68 david.dillard 1.32 // <![CDATA[ 69 // ... 70 // ]]>
71 mike 1.13 //
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
73 mike 1.13 //
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes. 75 // XmlAttribute values must be delimited.
76 mike 1.13 //
77 david.dillard 1.32 // 8. <!DOCTYPE...>
78 mike 1.13 // 79 // TODO: 80 //
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
83 mike 1.13 // rules rather than references to files). 84 //
85 david.dillard 1.32 // Remove newlines from string literals:
86 mike 1.13 // 87 // Example: <xyz x="hello
88 david.dillard 1.32 // world">
89 mike 1.13 // 90 //////////////////////////////////////////////////////////////////////////////// 91
92 sage 1.14 #include <Pegasus/Common/Config.h>
93 mike 1.13 #include <cctype> 94 #include <cstdio> 95 #include <cstdlib> 96 #include <cstring> 97 #include "XmlParser.h" 98 #include "Logger.h"
99 chuck 1.19 #include "ExceptionRep.h"
100 mike 1.34 #include "CharSet.h"
101 mike 1.13 102 PEGASUS_NAMESPACE_BEGIN 103 104 //////////////////////////////////////////////////////////////////////////////// 105 // 106 // Static helper functions 107 // 108 //////////////////////////////////////////////////////////////////////////////// 109 110 static void _printValue(const char* p) 111 { 112 for (; *p; p++) 113 {
114 david.dillard 1.32 if (p == '\n') 115 PEGASUS_STD(cout) << "\\n"; 116 else if (p == '\r') 117 PEGASUS_STD(cout) << "\\r"; 118 else if (p == '\t') 119 PEGASUS_STD(cout) << "\\t"; 120 else 121 PEGASUS_STD(cout) << p;
122 mike 1.13 } 123 } 124 125 struct EntityReference 126 { 127 const char* match; 128 Uint32 length; 129 char replacement; 130 }; 131
132 kumpf 1.18 // ATTN: Add support for more entity references
133 mike 1.13 static EntityReference _references[] = 134 { 135 { "&", 5, '&' }, 136 { "<", 4, '<' }, 137 { ">", 4, '>' }, 138 { """, 6, '"' }, 139 { "'", 6, '\'' } 140 }; 141
142 chuck 1.26 143 // Implements a check for a whitespace character, without calling 144 // isspace( ). The isspace( ) function is locale-sensitive, 145 // and incorrectly flags some chars above 0x7f as whitespace. This 146 // causes the XmlParser to incorrectly parse UTF-8 data. 147 // 148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) 149 // defines white space as:
150 david.dillard 1.32 // S ::= (#x20 \| #x9 \| #xD \| #xA)+
151 mike 1.34 static inline int _isspace(char c)
152 chuck 1.26 {
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
154 chuck 1.26 } 155
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); 157 158 //////////////////////////////////////////////////////////////////////////////// 159 // 160 // XmlException 161 // 162 //////////////////////////////////////////////////////////////////////////////// 163 164 static const char* _xmlMessages[] = 165 { 166 "Bad opening element", 167 "Bad closing element", 168 "Bad attribute name", 169 "Exepected equal sign", 170 "Bad attribute value", 171 "A \"--\" sequence found within comment", 172 "Unterminated comment", 173 "Unterminated CDATA block", 174 "Unterminated DOCTYPE", 175 "Too many attributes: parser only handles 10", 176 "Malformed reference", 177 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence", 178 "Closing element does not match opening element", 179 "One or more tags are still open", 180 "More than one root element was encountered", 181 "Validation error",
182 dmitry.mikulin 1.43.2.4 "Semantic error", 183 "Malformed namespace declaration", 184 "Namespace not supported", 185 "Namespace not declared"
186 mike 1.13 }; 187
188 david.dillard 1.32 static const char* _xmlKeys[] =
189 chuck 1.19 {
190 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
191 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 192 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 193 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 194 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 195 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 196 "Common.XmlParser.UNTERMINATED_COMMENT", 197 "Common.XmlParser.UNTERMINATED_CDATA", 198 "Common.XmlParser.UNTERMINATED_DOCTYPE", 199 "Common.XmlParser.TOO_MANY_ATTRIBUTES", 200 "Common.XmlParser.MALFORMED_REFERENCE", 201 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 202 "Common.XmlParser.START_END_MISMATCH",
203 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
204 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS", 205 "Common.XmlParser.VALIDATION_ERROR",
206 dmitry.mikulin 1.43.2.4 "Common.XmlParser.SEMANTIC_ERROR", 207 "Common.XmlParser.MALFORMED_NAMESPACE_DECL", 208 "Common.XmlParser.UNSUPPORTED_NAMESPACE", 209 "Common.XmlParser.UNDECLARED_NAMESPACE"
210 chuck 1.19 }; 211 212
213 kumpf 1.40 static MessageLoaderParms _formMessage( 214 Uint32 code, 215 Uint32 line, 216 const String& message)
217 chuck 1.19 { 218 String dftMsg = _xmlMessages[Uint32(code) - 1]; 219 String key = _xmlKeys[Uint32(code) - 1];
220 dmitry.mikulin 1.43.2.5 String msg = message;
221 chuck 1.19 222 dftMsg.append(": on line $0"); 223 if (message.size()) 224 {
225 david.dillard 1.32 msg = ": " + msg; 226 dftMsg.append("$1"); 227 }
228 chuck 1.19 229 return MessageLoaderParms(key, dftMsg, line ,msg); 230 } 231 232 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 233 { 234 String dftMsg = _xmlMessages[Uint32(code) - 1]; 235 String key = _xmlKeys[Uint32(code) - 1]; 236 237 dftMsg.append(": on line $0");
238 david.dillard 1.32
239 chuck 1.19 return MessageLoaderParms(key, dftMsg, line); 240 } 241
242 mike 1.13 243 XmlException::XmlException(
244 david.dillard 1.32 XmlException::Code code,
245 mike 1.13 Uint32 lineNumber,
246 david.dillard 1.32 const String& message)
247 mike 1.13 : Exception(_formMessage(code, lineNumber, message)) 248 { 249 250 } 251
252 chuck 1.19 253 XmlException::XmlException(
254 david.dillard 1.32 XmlException::Code code,
255 chuck 1.19 Uint32 lineNumber,
256 david.dillard 1.32 MessageLoaderParms& msgParms)
257 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber)) 258 {
259 david.dillard 1.32 if (msgParms.default_msg.size())
260 humberto 1.21 {
261 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg; 262 } 263 _rep->message.append(MessageLoader::getMessage(msgParms));
264 chuck 1.19 } 265 266
267 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 268 // 269 // XmlValidationError 270 // 271 //////////////////////////////////////////////////////////////////////////////// 272 273 XmlValidationError::XmlValidationError( 274 Uint32 lineNumber, 275 const String& message) 276 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 277 { 278 } 279
280 chuck 1.19 281 XmlValidationError::XmlValidationError( 282 Uint32 lineNumber, 283 MessageLoaderParms& msgParms) 284 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 285 { 286 } 287 288
289 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 290 // 291 // XmlSemanticError 292 // 293 //////////////////////////////////////////////////////////////////////////////// 294 295 XmlSemanticError::XmlSemanticError( 296 Uint32 lineNumber, 297 const String& message) 298 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 299 { 300 }
301 chuck 1.19 302 303 XmlSemanticError::XmlSemanticError( 304 Uint32 lineNumber, 305 MessageLoaderParms& msgParms) 306 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 307 { 308 } 309
310 mike 1.13 311 //////////////////////////////////////////////////////////////////////////////// 312 // 313 // XmlParser 314 // 315 //////////////////////////////////////////////////////////////////////////////// 316
317 dmitry.mikulin 1.43.2.2 XmlParser::XmlParser(char* text, XmlNamespace* ns)
318 kumpf 1.40 : _line(1), 319 _current(text), 320 _restoreChar('\0'),
321 dmitry.mikulin 1.43.2.2 _foundRoot(false), 322 _scopeLevel(0), 323 _supportedNamespaces(ns)
324 mike 1.13 { 325 } 326
327 mike 1.34 inline void _skipWhitespace(Uint32& line, char& p) 328 { 329 while (p && _isspace(p)) 330 { 331 if (p == '\n') 332 line++; 333 334 p++; 335 } 336 } 337
338 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) \|\| \ 339 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) 340 #pragma optimize( "", off ) 341 #endif
342 kumpf 1.37 static int _getEntityRef(char*& p) 343 { 344 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) 345 { 346 p += 3; 347 return '>'; 348 } 349 350 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) 351 { 352 p += 3; 353 return '<'; 354 } 355 356 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && 357 (p[4] == ';')) 358 { 359 p += 5; 360 return '\''; 361 } 362 363 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && 364 (p[4] == ';')) 365 { 366 p += 5; 367 return '"'; 368 } 369 370 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) 371 { 372 p += 4; 373 return '&'; 374 } 375 376 return -1; 377 }
378 s.manicka 1.43 #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) \|\| \ 379 defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) 380 #pragma optimize( "", on ) 381 #endif
382 kumpf 1.37 383 static inline int _getCharRef(char& p, bool hex) 384 { 385 char end; 386 unsigned long ch; 387 388 if (hex) 389 { 390 ch = strtoul(p, &end, 16); 391 } 392 else 393 { 394 ch = strtoul(p, &end, 10); 395 } 396 397 if ((end == p) \|\| (end != ';') \|\| (ch > 255)) 398 { 399 return -1; 400 } 401 402 if ((hex && (end - p > 4)) \|\| (!hex && (end - p > 5))) 403 kumpf 1.37 { 404 return -1; 405 } 406 407 p = end + 1; 408 409 return ch; 410 } 411 412 static void _normalize(Uint32& line, char& p, char end_char, char& start) 413 { 414 // Skip over leading whitespace: 415 416 _skipWhitespace(line, p); 417 start = p; 418 419 // Process one character at a time: 420 421 char q = p; 422 423 while (p && (p != end_char)) 424 kumpf 1.37 { 425 if (_isspace(p)) 426 { 427 // Compress sequences of whitespace characters to a single space 428 // character. Update line number when newlines encountered. 429 430 if (p++ == '\n') 431 { 432 line++; 433 } 434 435 q++ = ' '; 436 437 _skipWhitespace(line, p); 438 } 439 else if (p == '&') 440 { 441 // Process entity characters and entity references: 442 443 p++; 444 int ch; 445 kumpf 1.37 446 if (p == '#') 447 { 448 p++; 449 450 if (p == 'x') 451 { 452 p++; 453 ch = _getCharRef(p, true); 454 } 455 else 456 { 457 ch = _getCharRef(p, false); 458 } 459 } 460 else 461 { 462 ch = _getEntityRef(p); 463 } 464 465 if (ch == -1) 466 kumpf 1.37 { 467 throw XmlException(XmlException::MALFORMED_REFERENCE, line); 468 } 469 470 q++ = ch; 471 } 472 else 473 { 474 q++ = p++; 475 } 476 } 477
478 kumpf 1.40 // We encountered a the end_char or a zero-terminator.
479 kumpf 1.37 480 q = p; 481 482 // Remove single trailing whitespace (consecutive whitespaces already 483 // compressed above). Since p >= q, we can tell if we need to strip a 484 // trailing space from q by looking at the end of p. We must not look at 485 // the last character of p, though, if p is an empty string. 486 487 if ((p != start) && _isspace(p[-1])) 488 { 489 q--; 490 } 491 492 // If q got behind p, it is safe and necessary to null-terminate q 493 494 if (q != p) 495 { 496 *q = '\0'; 497 } 498 } 499
500 kumpf 1.43.2.1 Boolean XmlParser::next( 501 XmlEntry& entry,
502 dmitry.mikulin 1.43.2.2 Boolean includeComment)
503 mike 1.13 {
504 kumpf 1.43.2.1 entry.attributes.clear(); 505
506 mike 1.13 if (!_putBackStack.isEmpty()) 507 {
508 david.dillard 1.32 entry = _putBackStack.top(); 509 _putBackStack.pop(); 510 return true;
511 mike 1.13 } 512 513 // If a character was overwritten with a null-terminator the last 514 // time this routine was called, then put back that character. Before 515 // exiting of course, restore the null-terminator. 516 517 char* nullTerminator = 0; 518 519 if (_restoreChar && !*_current) 520 {
521 david.dillard 1.32 nullTerminator = _current; 522 *_current = _restoreChar; 523 _restoreChar = '\0';
524 mike 1.13 } 525
526 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false. 527 do 528 { 529 // Skip over any whitespace: 530 _skipWhitespace(_line, _current); 531 532 if (!_current) 533 { 534 if (nullTerminator) 535 nullTerminator = '\0';
536 mike 1.13
537 venkat.puvvada 1.41 if (!_stack.isEmpty()) 538 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
539 mike 1.13
540 venkat.puvvada 1.41 return false; 541 }
542 mike 1.13
543 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
544 mike 1.13
545 venkat.puvvada 1.41 if (*_current == '<') 546 { 547 _current++; 548 _getElement(_current, entry);
549 mike 1.13
550 venkat.puvvada 1.41 if (nullTerminator) 551 *nullTerminator = '\0';
552 mike 1.13
553 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG) 554 { 555 if (_stack.isEmpty() && _foundRoot) 556 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
557 mike 1.13
558 venkat.puvvada 1.41 _foundRoot = true; 559 _stack.push((char*)entry.text); 560 } 561 else if (entry.type == XmlEntry::END_TAG) 562 { 563 if (_stack.isEmpty()) 564 throw XmlException(XmlException::START_END_MISMATCH, _line);
565 mike 1.13
566 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0) 567 throw XmlException(XmlException::START_END_MISMATCH, _line);
568 david.dillard 1.32
569 venkat.puvvada 1.41 _stack.pop(); 570 }
571 david.dillard 1.32 }
572 venkat.puvvada 1.41 else
573 david.dillard 1.32 {
574 venkat.puvvada 1.41 // Normalize the content:
575 mike 1.13
576 venkat.puvvada 1.41 char* start; 577 _normalize(_line, _current, '<', start);
578 mike 1.13
579 venkat.puvvada 1.41 // Get the content:
580 mike 1.13
581 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT; 582 entry.text = start;
583 kumpf 1.37
584 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
585 kumpf 1.37
586 venkat.puvvada 1.41 _restoreChar = _current; 587 _current = '\0';
588 kumpf 1.37
589 venkat.puvvada 1.41 if (nullTerminator) 590 *nullTerminator = '\0'; 591 }
592 dmitry.mikulin 1.43.2.2 } while (!includeComment && entry.type == XmlEntry::COMMENT); 593
594 kumpf 1.43.2.3 if (_supportedNamespaces && 595 (entry.type == XmlEntry::START_TAG \|\| 596 entry.type == XmlEntry::EMPTY_TAG \|\| 597 entry.type == XmlEntry::END_TAG))
598 dmitry.mikulin 1.43.2.2 { 599 // Process attributes and enter namespaces into the table 600 if (entry.type == XmlEntry::START_TAG \|\| 601 entry.type == XmlEntry::EMPTY_TAG) 602 { 603 _scopeLevel++; 604 for (unsigned int i = 0; i < entry.attributes.size(); i++) 605 {
606 kumpf 1.43.2.3 XmlAttribute& attr = entry.attributes[i];
607 dmitry.mikulin 1.43.2.4 if (strncmp(attr.name, "xmlns", 5) == 0)
608 dmitry.mikulin 1.43.2.2 { 609 XmlNamespace ns;
610 dmitry.mikulin 1.43.2.4 if (attr.name[5] == ':')
611 dmitry.mikulin 1.43.2.2 {
612 dmitry.mikulin 1.43.2.4 ns.localName = attr.name + 6; 613 614 // Check if we have malformed XML of the form: 615 // "xmlns:=URI". In this case attr.name will be set 616 // to "xmlns:" and ns.localName will point to '\0' 617 if (ns.localName[0] == '\0') 618 { 619 throw XmlException( 620 XmlException::MALFORMED_NAMESPACE_DECL, 621 _line); 622 }
623 dmitry.mikulin 1.43.2.2 } 624 else 625 {
626 dmitry.mikulin 1.43.2.4 // Default name space has no local name 627 ns.localName = 0;
628 dmitry.mikulin 1.43.2.2 }
629 dmitry.mikulin 1.43.2.4 ns.extendedName = attr.value; 630 ns.scopeLevel = _scopeLevel; 631 ns.type = getSupportedNamespaceType(ns.extendedName); 632 633 // Even unsupported namespaces get pushed onto the stack. 634 // We will throw an exception of there is an attempt to 635 // reference an unsupported namespace later. 636 _nameSpaces.push(ns); 637 } 638 else 639 { 640 // Attribute names may also be namespace qualified. 641 attr.nsType = _getNamespaceType(attr.name);
642 dmitry.mikulin 1.43.2.2 } 643 } 644 }
645 kumpf 1.43.2.3
646 dmitry.mikulin 1.43.2.4 // Get the namespace type for this tag. 647 entry.nsType = _getNamespaceType(entry.text);
648 dmitry.mikulin 1.43.2.2
649 kumpf 1.43.2.3 if (entry.type == XmlEntry::END_TAG \|\| 650 entry.type == XmlEntry::EMPTY_TAG) 651 {
652 dmitry.mikulin 1.43.2.2 // Remove any namespaces of the current scope level from 653 // the scope stack. 654 while (!_nameSpaces.isEmpty() && 655 _scopeLevel <= _nameSpaces.top().scopeLevel) 656 { 657 _nameSpaces.pop(); 658 } 659 660 PEGASUS_ASSERT(_scopeLevel > 0); 661 _scopeLevel--; 662 } 663 } 664 else 665 { 666 entry.nsType = -1; 667 }
668 kumpf 1.37
669 venkat.puvvada 1.41 return true;
670 mike 1.13 } 671
672 dmitry.mikulin 1.43.2.2 // Get the namespace type of the given tag 673 int XmlParser::_getNamespaceType(const char* tag) 674 { 675 const char* pos = strchr(tag, ':'); 676
677 dmitry.mikulin 1.43.2.4 // If ":" is not found, the tag is not namespace qualified and we 678 // need to look for the default name space.
679 dmitry.mikulin 1.43.2.2 680 // Search the namespace stack from the top 681 for (int i = _nameSpaces.size() - 1; i >=0; i--) 682 {
683 dmitry.mikulin 1.43.2.4 // If ":" is found, look for the name space with the matching 684 // local name... 685 if ((pos && _nameSpaces[i].localName && 686 !strncmp(_nameSpaces[i].localName, tag, pos - tag)) \|\| 687 // ... otherwise look for the default name space. It's the 688 // one with localName set to NULL 689 (!pos && !_nameSpaces[i].localName)) 690 { 691 // If it's a reference to an unsupported namespace, 692 // throw an exception 693 if (_nameSpaces[i].type == -1) 694 { 695 throw XmlException(XmlException::UNSUPPORTED_NAMESPACE, _line); 696 }
697 dmitry.mikulin 1.43.2.2 return _nameSpaces[i].type; 698 } 699 }
700 dmitry.mikulin 1.43.2.4 701 // If the tag is namespace qualified, but the name space has not been 702 // declared, it's malformed XML and we must throw an exception 703 if (pos) 704 { 705 throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line); 706 } 707 708 // Otherwise it's OK not to have a name space.
709 dmitry.mikulin 1.43.2.2 return -1; 710 } 711 712 // Gived the extended namespace name, find it in the table of supported 713 // namespaces and return its type. 714 int XmlParser::getSupportedNamespaceType(const char* extendedName) 715 { 716 for (int i = 0; 717 _supportedNamespaces[i].localName != 0; 718 i++) 719 { 720 PEGASUS_ASSERT(_supportedNamespaces[i].type == i); 721 if (!strcmp(_supportedNamespaces[i].extendedName, extendedName)) 722 { 723 return _supportedNamespaces[i].type; 724 } 725 } 726 return -1; 727 } 728 729 XmlNamespace* XmlParser::getNamespace(int nsType) 730 dmitry.mikulin 1.43.2.2 { 731 for (int i = _nameSpaces.size() - 1; i >=0; i--) 732 { 733 if (_nameSpaces[i].type == nsType) 734 { 735 return &_nameSpaces[i]; 736 } 737 } 738 return 0; 739 } 740
741 mike 1.13 void XmlParser::putBack(XmlEntry& entry) 742 { 743 _putBackStack.push(entry); 744 } 745 746 XmlParser::~XmlParser() 747 { 748 // Nothing to do! 749 } 750
751 mike 1.35 // A-Za-z0-9_-:.
752 kumpf 1.40 static unsigned char _isInnerElementChar[] =
753 mike 1.35 { 754 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 755 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1, 756 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 757 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 758 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 759 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 760 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 761 }; 762
763 mike 1.13 Boolean XmlParser::_getElementName(char*& p) 764 {
765 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
766 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
767 mike 1.35
768 kumpf 1.24 p++;
769 mike 1.13
770 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
771 david.dillard 1.32 p++;
772 mike 1.13 773 // The next character must be a space: 774
775 chuck 1.26 if (_isspace(*p))
776 mike 1.13 {
777 david.dillard 1.32 *p++ = '\0';
778 mike 1.34 _skipWhitespace(_line, p);
779 mike 1.13 } 780 781 if (*p == '>') 782 {
783 david.dillard 1.32 *p++ = '\0'; 784 return true;
785 mike 1.13 } 786 787 return false; 788 } 789 790 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement) 791 { 792 openCloseElement = false; 793
794 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
795 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
796 mike 1.35
797 kumpf 1.24 p++;
798 mike 1.13
799 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
800 david.dillard 1.32 p++;
801 mike 1.13 802 // The next character must be a space: 803
804 chuck 1.26 if (_isspace(*p))
805 mike 1.13 {
806 david.dillard 1.32 *p++ = '\0';
807 mike 1.34 _skipWhitespace(_line, p);
808 mike 1.13 } 809 810 if (*p == '>') 811 {
812 david.dillard 1.32 *p++ = '\0'; 813 return true;
814 mike 1.13 } 815 816 if (p[0] == '/' && p[1] == '>') 817 {
818 david.dillard 1.32 openCloseElement = true; 819 *p = '\0'; 820 p += 2; 821 return true;
822 mike 1.13 } 823 824 return false; 825 } 826 827 void XmlParser::_getAttributeNameAndEqual(char*& p) 828 {
829 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
830 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
831 mike 1.35
832 kumpf 1.24 p++;
833 mike 1.13
834 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
835 david.dillard 1.32 p++;
836 mike 1.13 837 char* term = p; 838
839 mike 1.34 _skipWhitespace(_line, p);
840 mike 1.13 841 if (*p != '=')
842 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
843 mike 1.13 844 p++; 845
846 mike 1.34 _skipWhitespace(_line, p);
847 mike 1.13 848 term = '\0'; 849 } 850 851 void XmlParser::_getComment(char& p) 852 { 853 // Now p points to first non-whitespace character beyond "<--" sequence: 854 855 for (; *p; p++) 856 {
857 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 858 { 859 if (p[2] != '>') 860 { 861 throw XmlException( 862 XmlException::MINUS_MINUS_IN_COMMENT, _line); 863 } 864 865 // Find end of comment (excluding whitespace): 866 867 *p = '\0'; 868 p += 3; 869 return; 870 }
871 mike 1.13 } 872 873 // If it got this far, then the comment is unterminated: 874 875 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 876 } 877 878 void XmlParser::_getCData(char& p) 879 { 880 // At this point p points one past "<![CDATA[" sequence: 881 882 for (; p; p++) 883 {
884 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 885 { 886 p = '\0'; 887 p += 3; 888 return; 889 } 890 else if (p == '\n') 891 _line++;
892 mike 1.13 } 893 894 // If it got this far, then the comment is unterminated: 895 896 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 897 } 898 899 void XmlParser::_getDocType(char& p) 900 { 901 // Just ignore the DOCTYPE command for now: 902 903 for (; p && *p != '>'; p++) 904 {
905 david.dillard 1.32 if (*p == '\n') 906 _line++;
907 mike 1.13 } 908 909 if (*p != '>')
910 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
911 mike 1.13 912 p++; 913 } 914 915 void XmlParser::_getElement(char& p, XmlEntry& entry) 916 { 917 //-------------------------------------------------------------------------- 918 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 919 //-------------------------------------------------------------------------- 920 921 if (p == '?') 922 {
923 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION; 924 entry.text = ++p;
925 mike 1.13
926 david.dillard 1.32 Boolean openCloseElement = false;
927 mike 1.13
928 david.dillard 1.32 if (_getElementName(p)) 929 return;
930 mike 1.13 } 931 else if (*p == '!') 932 {
933 david.dillard 1.32 p++;
934 mike 1.13
935 david.dillard 1.32 // Expect a comment or CDATA:
936 mike 1.13
937 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 938 { 939 p += 2; 940 entry.type = XmlEntry::COMMENT; 941 entry.text = p; 942 _getComment(p); 943 return; 944 } 945 else if (memcmp(p, "[CDATA[", 7) == 0) 946 { 947 p += 7; 948 entry.type = XmlEntry::CDATA; 949 entry.text = p; 950 _getCData(p); 951 return; 952 } 953 else if (memcmp(p, "DOCTYPE", 7) == 0) 954 { 955 entry.type = XmlEntry::DOCTYPE;
956 kumpf 1.37 entry.text = "";
957 david.dillard 1.32 _getDocType(p); 958 return; 959 } 960 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
961 mike 1.13 } 962 else if (*p == '/') 963 {
964 david.dillard 1.32 entry.type = XmlEntry::END_TAG; 965 entry.text = ++p;
966 mike 1.13
967 david.dillard 1.32 if (!_getElementName(p)) 968 throw(XmlException(XmlException::BAD_END_TAG, _line));
969 mike 1.13
970 david.dillard 1.32 return;
971 mike 1.13 }
972 david 1.25 else if ((((p >= 'A') && (p <= 'Z')) \|\|
973 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 974 (*p == '_')))
975 mike 1.13 {
976 david.dillard 1.32 entry.type = XmlEntry::START_TAG; 977 entry.text = p;
978 mike 1.13
979 david.dillard 1.32 Boolean openCloseElement = false;
980 mike 1.13
981 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement)) 982 { 983 if (openCloseElement) 984 entry.type = XmlEntry::EMPTY_TAG; 985 return; 986 }
987 mike 1.13 } 988 else
989 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
990 mike 1.13 991 //-------------------------------------------------------------------------- 992 // Grab all the attributes: 993 //-------------------------------------------------------------------------- 994 995 for (;;) 996 {
997 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION) 998 { 999 if (p[0] == '?' && p[1] == '>') 1000 { 1001 p += 2; 1002 return; 1003 } 1004 } 1005 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 1006 { 1007 entry.type = XmlEntry::EMPTY_TAG; 1008 p += 2; 1009 return; 1010 } 1011 else if (*p == '>') 1012 { 1013 p++; 1014 return; 1015 } 1016 1017 XmlAttribute attr;
1018 dmitry.mikulin 1.43.2.4 attr.nsType = -1;
1019 david.dillard 1.32 attr.name = p; 1020 _getAttributeNameAndEqual(p); 1021
1022 kumpf 1.37 // Get the attribute value (e.g., "some value") 1023 { 1024 if ((p != '"') && (p != '\'')) 1025 { 1026 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1027 } 1028 1029 char quote = p++; 1030 1031 char start; 1032 _normalize(_line, p, quote, start); 1033 attr.value = start; 1034 1035 if (*p != quote) 1036 { 1037 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1038 } 1039 1040 // Overwrite the closing quote with a null-terminator:
1041 david.dillard 1.32
1042 kumpf 1.37 *p++ = '\0'; 1043 }
1044 david.dillard 1.32 1045 if (entry.type == XmlEntry::XML_DECLARATION) 1046 { 1047 // The next thing must a space or a "?>":
1048 mike 1.13
1049 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(p)) 1050 { 1051 throw XmlException( 1052 XmlException::BAD_ATTRIBUTE_VALUE, _line); 1053 } 1054 } 1055 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| _isspace(*p))) 1056 { 1057 // The next thing must be a space or a '>':
1058 mike 1.13
1059 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 1060 }
1061 mike 1.13
1062 mike 1.34 _skipWhitespace(_line, p);
1063 david.dillard 1.32
1064 kumpf 1.43.2.1 entry.attributes.append(attr);
1065 mike 1.13 } 1066 } 1067 1068 static const char* _typeStrings[] = 1069 {
1070 david.dillard 1.32 "XML_DECLARATION", 1071 "START_TAG", 1072 "EMPTY_TAG", 1073 "END_TAG",
1074 mike 1.13 "COMMENT", 1075 "CDATA", 1076 "DOCTYPE",
1077 david.dillard 1.32 "CONTENT"
1078 mike 1.13 }; 1079 1080 void XmlEntry::print() const 1081 { 1082 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 1083 1084 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 1085 1086 if (needQuotes)
1087 david.dillard 1.32 PEGASUS_STD(cout) << "\""; 1088
1089 mike 1.13 _printValue(text); 1090 1091 if (needQuotes)
1092 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1093 mike 1.13 1094 PEGASUS_STD(cout) << '\n'; 1095
1096 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
1097 mike 1.13 {
1098 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 1099 _printValue(attributes[i].value); 1100 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1101 mike 1.13 } 1102 } 1103 1104 const XmlAttribute* XmlEntry::findAttribute( 1105 const char* name) const 1106 {
1107 kumpf 1.43.2.1 for (Uint32 i = 0; i < attributes.size(); i++)
1108 mike 1.13 {
1109 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0) 1110 return &attributes[i];
1111 mike 1.13 } 1112 1113 return 0; 1114 } 1115 1116 // Find first non-whitespace character (set first) and last non-whitespace 1117 // character (set last one past this). For example, consider this string: 1118 //
1119 david.dillard 1.32 // " 87 "
1120 mike 1.13 // 1121 // The first pointer would point to '8' and the last pointer woudl point one 1122 // beyond '7'. 1123 1124 static void _findEnds(
1125 david.dillard 1.32 const char* str, 1126 const char*& first,
1127 mike 1.13 const char*& last) 1128 { 1129 first = str; 1130
1131 chuck 1.26 while (_isspace(*first))
1132 david.dillard 1.32 first++;
1133 mike 1.13 1134 if (!*first) 1135 {
1136 david.dillard 1.32 last = first; 1137 return;
1138 mike 1.13 } 1139 1140 last = first + strlen(first); 1141
1142 chuck 1.26 while (last != first && _isspace(last[-1]))
1143 david.dillard 1.32 last--;
1144 mike 1.13 } 1145 1146 Boolean XmlEntry::getAttributeValue(
1147 david.dillard 1.32 const char* name,
1148 mike 1.13 Uint32& value) const 1149 { 1150 const XmlAttribute* attr = findAttribute(name); 1151 1152 if (!attr)
1153 david.dillard 1.32 return false;
1154 mike 1.13 1155 const char* first; 1156 const char* last; 1157 _findEnds(attr->value, first, last); 1158 1159 char* end = 0; 1160 long tmp = strtol(first, &end, 10); 1161 1162 if (!end \|\| end != last)
1163 david.dillard 1.32 return false;
1164 mike 1.13 1165 value = Uint32(tmp); 1166 return true; 1167 } 1168 1169 Boolean XmlEntry::getAttributeValue(
1170 david.dillard 1.32 const char* name,
1171 mike 1.13 Real32& value) const 1172 { 1173 const XmlAttribute* attr = findAttribute(name); 1174 1175 if (!attr)
1176 david.dillard 1.32 return false;
1177 mike 1.13 1178 const char* first; 1179 const char* last; 1180 _findEnds(attr->value, first, last); 1181 1182 char* end = 0; 1183 double tmp = strtod(first, &end); 1184 1185 if (!end \|\| end != last)
1186 david.dillard 1.32 return false;
1187 mike 1.13
1188 david.dillard 1.32 value = static_cast<Real32>(tmp);
1189 mike 1.13 return true; 1190 } 1191 1192 Boolean XmlEntry::getAttributeValue(
1193 david.dillard 1.32 const char* name,
1194 mike 1.13 const char& value) const 1195 { 1196 const XmlAttribute attr = findAttribute(name); 1197 1198 if (!attr)
1199 david.dillard 1.32 return false;
1200 mike 1.13 1201 value = attr->value; 1202 return true; 1203 } 1204 1205 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1206 { 1207 const char* tmp; 1208 1209 if (!getAttributeValue(name, tmp))
1210 david.dillard 1.32 return false;
1211 mike 1.13
1212 chuck 1.28 value = String(tmp);
1213 mike 1.13 return true; 1214 } 1215
1216 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
1217 mike 1.13 {
1218 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
1219 mike 1.13 } 1220 1221 PEGASUS_NAMESPACE_END

No CVS admin address has been configured