pegasus/src/Pegasus/Common/XmlParser.cpp - annotate

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
2 mike 1.13 //
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group.
13 mike 1.13 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions:
20 karl 1.38 //
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 // 32 //%///////////////////////////////////////////////////////////////////////////// 33 34 //////////////////////////////////////////////////////////////////////////////// 35 // 36 // XmlParser 37 //
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are 39 // serveral rules for well-formed XML:
40 mike 1.13 //
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
42 mike 1.13 //
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
44 mike 1.13 //
45 david.dillard 1.32 // 2. Comments have the form:
46 mike 1.13 //
47 david.dillard 1.32 // <!-- blah blah blah -->
48 mike 1.13 //
49 david.dillard 1.32 // 3. The following entity references are supported:
50 mike 1.13 //
51 david.dillard 1.32 // &amp - ampersand 52 // &lt - less-than 53 // &gt - greater-than 54 // &quot - full quote 55 // &apos - apostrophe
56 mike 1.13 //
57 kumpf 1.18 // as well as character (numeric) references:
58 mike 1.35 //
59 kumpf 1.18 // 1 - decimal reference for character '1' 60 // 1 - hexadecimal reference for character '1' 61 //
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
63 mike 1.13 //
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
65 mike 1.13 //
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
67 mike 1.13 //
68 david.dillard 1.32 // <![CDATA[ 69 // ... 70 // ]]>
71 mike 1.13 //
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
73 mike 1.13 //
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes. 75 // XmlAttribute values must be delimited.
76 mike 1.13 //
77 david.dillard 1.32 // 8. <!DOCTYPE...>
78 mike 1.13 // 79 // TODO: 80 //
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
83 mike 1.13 // rules rather than references to files). 84 //
85 david.dillard 1.32 // Remove newlines from string literals:
86 mike 1.13 // 87 // Example: <xyz x="hello
88 david.dillard 1.32 // world">
89 mike 1.13 // 90 //////////////////////////////////////////////////////////////////////////////// 91
92 sage 1.14 #include <Pegasus/Common/Config.h>
93 mike 1.13 #include <cctype> 94 #include <cstdio> 95 #include <cstdlib> 96 #include <cstring> 97 #include "XmlParser.h" 98 #include "Logger.h"
99 chuck 1.19 #include "ExceptionRep.h"
100 mike 1.34 #include "CharSet.h"
101 mike 1.13 102 PEGASUS_NAMESPACE_BEGIN 103 104 //////////////////////////////////////////////////////////////////////////////// 105 // 106 // Static helper functions 107 // 108 //////////////////////////////////////////////////////////////////////////////// 109 110 static void _printValue(const char* p) 111 { 112 for (; *p; p++) 113 {
114 david.dillard 1.32 if (p == '\n') 115 PEGASUS_STD(cout) << "\\n"; 116 else if (p == '\r') 117 PEGASUS_STD(cout) << "\\r"; 118 else if (p == '\t') 119 PEGASUS_STD(cout) << "\\t"; 120 else 121 PEGASUS_STD(cout) << p;
122 mike 1.13 } 123 } 124 125 struct EntityReference 126 { 127 const char* match; 128 Uint32 length; 129 char replacement; 130 }; 131
132 kumpf 1.18 // ATTN: Add support for more entity references
133 mike 1.13 static EntityReference _references[] = 134 { 135 { "&", 5, '&' }, 136 { "<", 4, '<' }, 137 { ">", 4, '>' }, 138 { """, 6, '"' }, 139 { "'", 6, '\'' } 140 }; 141
142 chuck 1.26 143 // Implements a check for a whitespace character, without calling 144 // isspace( ). The isspace( ) function is locale-sensitive, 145 // and incorrectly flags some chars above 0x7f as whitespace. This 146 // causes the XmlParser to incorrectly parse UTF-8 data. 147 // 148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) 149 // defines white space as:
150 david.dillard 1.32 // S ::= (#x20 \| #x9 \| #xD \| #xA)+
151 mike 1.34 static inline int _isspace(char c)
152 chuck 1.26 {
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
154 chuck 1.26 } 155
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); 157 158 //////////////////////////////////////////////////////////////////////////////// 159 // 160 // XmlException 161 // 162 //////////////////////////////////////////////////////////////////////////////// 163 164 static const char* _xmlMessages[] = 165 { 166 "Bad opening element", 167 "Bad closing element", 168 "Bad attribute name", 169 "Exepected equal sign", 170 "Bad attribute value", 171 "A \"--\" sequence found within comment", 172 "Unterminated comment", 173 "Unterminated CDATA block", 174 "Unterminated DOCTYPE", 175 "Too many attributes: parser only handles 10", 176 "Malformed reference", 177 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence", 178 "Closing element does not match opening element", 179 "One or more tags are still open", 180 "More than one root element was encountered", 181 "Validation error", 182 "Semantic error" 183 }; 184
185 david.dillard 1.32 static const char* _xmlKeys[] =
186 chuck 1.19 {
187 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
188 chuck 1.19 "Common.XmlParser.BAD_END_TAG", 189 "Common.XmlParser.BAD_ATTRIBUTE_NAME", 190 "Common.XmlParser.EXPECTED_EQUAL_SIGN", 191 "Common.XmlParser.BAD_ATTRIBUTE_VALUE", 192 "Common.XmlParser.MINUS_MINUS_IN_COMMENT", 193 "Common.XmlParser.UNTERMINATED_COMMENT", 194 "Common.XmlParser.UNTERMINATED_CDATA", 195 "Common.XmlParser.UNTERMINATED_DOCTYPE", 196 "Common.XmlParser.TOO_MANY_ATTRIBUTES", 197 "Common.XmlParser.MALFORMED_REFERENCE", 198 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", 199 "Common.XmlParser.START_END_MISMATCH",
200 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
201 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS", 202 "Common.XmlParser.VALIDATION_ERROR", 203 "Common.XmlParser.SEMANTIC_ERROR" 204 }; 205
206 chuck 1.23 // l10n replace _formMessage (comment out the old one)
207 chuck 1.19 /*
208 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message) 209 { 210 String result = _xmlMessages[Uint32(code) - 1]; 211 212 char buffer[32]; 213 sprintf(buffer, "%d", line); 214 result.append(": on line "); 215 result.append(buffer); 216 217 if (message.size()) 218 {
219 david.dillard 1.32 result.append(": "); 220 result.append(message);
221 mike 1.13 } 222 223 return result; 224 }
225 chuck 1.19 */ 226
227 kumpf 1.40 static MessageLoaderParms _formMessage( 228 Uint32 code, 229 Uint32 line, 230 const String& message)
231 chuck 1.19 { 232 String dftMsg = _xmlMessages[Uint32(code) - 1]; 233 String key = _xmlKeys[Uint32(code) - 1];
234 david.dillard 1.32 String msg = message;
235 chuck 1.19 236 dftMsg.append(": on line $0"); 237 if (message.size()) 238 {
239 david.dillard 1.32 msg = ": " + msg; 240 dftMsg.append("$1"); 241 }
242 chuck 1.19 243 return MessageLoaderParms(key, dftMsg, line ,msg); 244 } 245 246 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) 247 { 248 String dftMsg = _xmlMessages[Uint32(code) - 1]; 249 String key = _xmlKeys[Uint32(code) - 1]; 250 251 dftMsg.append(": on line $0");
252 david.dillard 1.32
253 chuck 1.19 return MessageLoaderParms(key, dftMsg, line); 254 } 255
256 mike 1.13 257 XmlException::XmlException(
258 david.dillard 1.32 XmlException::Code code,
259 mike 1.13 Uint32 lineNumber,
260 david.dillard 1.32 const String& message)
261 mike 1.13 : Exception(_formMessage(code, lineNumber, message)) 262 { 263 264 } 265
266 chuck 1.19 267 XmlException::XmlException(
268 david.dillard 1.32 XmlException::Code code,
269 chuck 1.19 Uint32 lineNumber,
270 david.dillard 1.32 MessageLoaderParms& msgParms)
271 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber)) 272 {
273 david.dillard 1.32 if (msgParms.default_msg.size())
274 humberto 1.21 {
275 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg; 276 } 277 _rep->message.append(MessageLoader::getMessage(msgParms));
278 chuck 1.19 } 279 280
281 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 282 // 283 // XmlValidationError 284 // 285 //////////////////////////////////////////////////////////////////////////////// 286 287 XmlValidationError::XmlValidationError( 288 Uint32 lineNumber, 289 const String& message) 290 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) 291 { 292 } 293
294 chuck 1.19 295 XmlValidationError::XmlValidationError( 296 Uint32 lineNumber, 297 MessageLoaderParms& msgParms) 298 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) 299 { 300 } 301 302
303 mike 1.13 //////////////////////////////////////////////////////////////////////////////// 304 // 305 // XmlSemanticError 306 // 307 //////////////////////////////////////////////////////////////////////////////// 308 309 XmlSemanticError::XmlSemanticError( 310 Uint32 lineNumber, 311 const String& message) 312 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) 313 { 314 }
315 chuck 1.19 316 317 XmlSemanticError::XmlSemanticError( 318 Uint32 lineNumber, 319 MessageLoaderParms& msgParms) 320 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) 321 { 322 } 323
324 mike 1.13 325 //////////////////////////////////////////////////////////////////////////////// 326 // 327 // XmlParser 328 // 329 //////////////////////////////////////////////////////////////////////////////// 330
331 kumpf 1.40 XmlParser::XmlParser(char* text) 332 : _line(1), 333 _current(text), 334 _restoreChar('\0'), 335 _foundRoot(false)
336 mike 1.13 { 337 } 338
339 mike 1.34 inline void _skipWhitespace(Uint32& line, char& p) 340 { 341 while (p && _isspace(p)) 342 { 343 if (p == '\n') 344 line++; 345 346 p++; 347 } 348 } 349
350 kumpf 1.37 static int _getEntityRef(char*& p) 351 { 352 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) 353 { 354 p += 3; 355 return '>'; 356 } 357 358 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) 359 { 360 p += 3; 361 return '<'; 362 } 363 364 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && 365 (p[4] == ';')) 366 { 367 p += 5; 368 return '\''; 369 } 370 371 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && 372 (p[4] == ';')) 373 { 374 p += 5; 375 return '"'; 376 } 377 378 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) 379 { 380 p += 4; 381 return '&'; 382 } 383 384 return -1; 385 } 386
387 kumpf 1.42.4.1 static inline int _getCharRef(char*& p)
388 kumpf 1.37 { 389 char* end; 390 unsigned long ch;
391 kumpf 1.42.4.1 Boolean hex = false;
392 kumpf 1.37
393 kumpf 1.42.4.1 if (*p == 'x')
394 kumpf 1.37 {
395 kumpf 1.42.4.1 hex = true; 396 ch = strtoul(++p, &end, 16);
397 kumpf 1.37 } 398 else 399 { 400 ch = strtoul(p, &end, 10); 401 } 402 403 if ((end == p) \|\| (*end != ';') \|\| (ch > 255)) 404 { 405 return -1; 406 } 407 408 if ((hex && (end - p > 4)) \|\| (!hex && (end - p > 5))) 409 { 410 return -1; 411 } 412 413 p = end + 1; 414 415 return ch; 416 } 417
418 kumpf 1.42.4.1 // Parse an entity reference or a character reference 419 static inline int _getRef(Uint32 line, char*& p)
420 kumpf 1.37 {
421 kumpf 1.42.4.1 int ch;
422 kumpf 1.37
423 kumpf 1.42.4.1 if (*p == '#') 424 { 425 ch = _getCharRef(++p); 426 } 427 else 428 { 429 ch = _getEntityRef(p); 430 } 431 432 if (ch == -1) 433 { 434 throw XmlException(XmlException::MALFORMED_REFERENCE, line); 435 }
436 kumpf 1.37
437 kumpf 1.42.4.1 return ch; 438 } 439 440 static inline void _normalizeElementValue( 441 Uint32& line, 442 char*& p) 443 {
444 kumpf 1.37 // Process one character at a time: 445 446 char* q = p; 447
448 kumpf 1.42.4.1 while (p && (p != '<'))
449 kumpf 1.37 { 450 if (_isspace(*p)) 451 {
452 kumpf 1.42.4.1 // Trim whitespace from the end of the value, but do not compress 453 // whitespace within the value. 454 455 const char* start = p;
456 kumpf 1.37 457 if (*p++ == '\n') 458 { 459 line++; 460 } 461 462 _skipWhitespace(line, p); 463
464 kumpf 1.42.4.1 if (p && (p != '<'))
465 kumpf 1.37 {
466 kumpf 1.42.4.1 // Transfer internal whitespace to q without compressing it. 467 const char* i = start; 468 while (i < p)
469 kumpf 1.37 {
470 kumpf 1.42.4.1 q++ = i++;
471 kumpf 1.37 } 472 } 473 else 474 {
475 kumpf 1.42.4.1 // Do not transfer trailing whitespace to q. 476 break;
477 kumpf 1.37 }
478 kumpf 1.42.4.1 } 479 else if (p == '&') 480 { 481 // Process an entity reference or a character reference. 482 483 q++ = _getRef(line, ++p); 484 } 485 else 486 { 487 q++ = p++; 488 } 489 } 490 491 // If q got behind p, it is safe and necessary to null-terminate q
492 kumpf 1.37
493 kumpf 1.42.4.1 if (q != p) 494 { 495 q = '\0'; 496 } 497 } 498 499 static inline void _normalizeAttributeValue( 500 Uint32& line, 501 char& p, 502 char end_char, 503 char& start) 504 { 505 // Skip over leading whitespace: 506 507 _skipWhitespace(line, p); 508 start = p; 509 510 // Process one character at a time: 511 512 char q = p; 513 514 kumpf 1.42.4.1 while (p && (p != end_char)) 515 { 516 if (_isspace(p)) 517 { 518 // Compress sequences of whitespace characters to a single space 519 // character. Update line number when newlines encountered. 520 521 if (p++ == '\n')
522 kumpf 1.37 {
523 kumpf 1.42.4.1 line++;
524 kumpf 1.37 } 525
526 kumpf 1.42.4.1 q++ = ' '; 527 528 _skipWhitespace(line, p); 529 } 530 else if (p == '&') 531 { 532 // Process an entity reference or a character reference. 533 534 *q++ = _getRef(line, ++p);
535 kumpf 1.37 } 536 else 537 { 538 q++ = p++; 539 } 540 } 541 542 // Remove single trailing whitespace (consecutive whitespaces already 543 // compressed above). Since p >= q, we can tell if we need to strip a 544 // trailing space from q by looking at the end of p. We must not look at 545 // the last character of p, though, if p is an empty string.
546 kumpf 1.42.4.1 Boolean adjust_q = (p != start) && _isspace(p[-1]);
547 kumpf 1.37
548 kumpf 1.42.4.1 // We encountered a the end_char or a zero-terminator. 549 550 q = p; 551 552 if (adjust_q)
553 kumpf 1.37 { 554 q--; 555 } 556 557 // If q got behind p, it is safe and necessary to null-terminate q 558 559 if (q != p) 560 { 561 *q = '\0'; 562 } 563 } 564
565 venkat.puvvada 1.41 Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)
566 mike 1.13 { 567 if (!_putBackStack.isEmpty()) 568 {
569 david.dillard 1.32 entry = _putBackStack.top(); 570 _putBackStack.pop(); 571 return true;
572 mike 1.13 } 573 574 // If a character was overwritten with a null-terminator the last 575 // time this routine was called, then put back that character. Before 576 // exiting of course, restore the null-terminator. 577 578 char* nullTerminator = 0; 579 580 if (_restoreChar && !*_current) 581 {
582 david.dillard 1.32 nullTerminator = _current; 583 *_current = _restoreChar; 584 _restoreChar = '\0';
585 mike 1.13 } 586
587 venkat.puvvada 1.41 // Loop until we are done with comments if includeComment is false. 588 do 589 { 590 // Skip over any whitespace: 591 _skipWhitespace(_line, _current); 592 593 if (!_current) 594 { 595 if (nullTerminator) 596 nullTerminator = '\0';
597 mike 1.13
598 venkat.puvvada 1.41 if (!_stack.isEmpty()) 599 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
600 mike 1.13
601 venkat.puvvada 1.41 return false; 602 }
603 mike 1.13
604 venkat.puvvada 1.41 // Either a "<...>" or content begins next:
605 mike 1.13
606 venkat.puvvada 1.41 if (*_current == '<') 607 { 608 _current++; 609 _getElement(_current, entry);
610 mike 1.13
611 venkat.puvvada 1.41 if (nullTerminator) 612 *nullTerminator = '\0';
613 mike 1.13
614 venkat.puvvada 1.41 if (entry.type == XmlEntry::START_TAG) 615 { 616 if (_stack.isEmpty() && _foundRoot) 617 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
618 mike 1.13
619 venkat.puvvada 1.41 _foundRoot = true; 620 _stack.push((char*)entry.text); 621 } 622 else if (entry.type == XmlEntry::END_TAG) 623 { 624 if (_stack.isEmpty()) 625 throw XmlException(XmlException::START_END_MISMATCH, _line);
626 mike 1.13
627 venkat.puvvada 1.41 if (strcmp(_stack.top(), entry.text) != 0) 628 throw XmlException(XmlException::START_END_MISMATCH, _line);
629 david.dillard 1.32
630 venkat.puvvada 1.41 _stack.pop(); 631 }
632 david.dillard 1.32 }
633 venkat.puvvada 1.41 else
634 david.dillard 1.32 {
635 venkat.puvvada 1.41 // Normalize the content:
636 mike 1.13
637 kumpf 1.42.4.1 char* start = _current; 638 _normalizeElementValue(_line, _current);
639 mike 1.13
640 venkat.puvvada 1.41 // Get the content:
641 mike 1.13
642 venkat.puvvada 1.41 entry.type = XmlEntry::CONTENT; 643 entry.text = start;
644 kumpf 1.37
645 venkat.puvvada 1.41 // Overwrite '<' with a null character (temporarily).
646 kumpf 1.37
647 venkat.puvvada 1.41 _restoreChar = _current; 648 _current = '\0';
649 kumpf 1.37
650 venkat.puvvada 1.41 if (nullTerminator) 651 *nullTerminator = '\0'; 652 } 653 }while (!includeComment && entry.type == XmlEntry::COMMENT);
654 kumpf 1.37
655 venkat.puvvada 1.41 return true;
656 mike 1.13 } 657 658 void XmlParser::putBack(XmlEntry& entry) 659 { 660 _putBackStack.push(entry); 661 } 662 663 XmlParser::~XmlParser() 664 { 665 // Nothing to do! 666 } 667
668 mike 1.35 // A-Za-z0-9_-:.
669 kumpf 1.40 static unsigned char _isInnerElementChar[] =
670 mike 1.35 { 671 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 672 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1, 673 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 674 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 675 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 676 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 677 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 678 }; 679
680 mike 1.13 Boolean XmlParser::_getElementName(char*& p) 681 {
682 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
683 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
684 mike 1.35
685 kumpf 1.24 p++;
686 mike 1.13
687 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
688 david.dillard 1.32 p++;
689 mike 1.13 690 // The next character must be a space: 691
692 chuck 1.26 if (_isspace(*p))
693 mike 1.13 {
694 david.dillard 1.32 *p++ = '\0';
695 mike 1.34 _skipWhitespace(_line, p);
696 mike 1.13 } 697 698 if (*p == '>') 699 {
700 david.dillard 1.32 *p++ = '\0'; 701 return true;
702 mike 1.13 } 703 704 return false; 705 } 706 707 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement) 708 { 709 openCloseElement = false; 710
711 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
712 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
713 mike 1.35
714 kumpf 1.24 p++;
715 mike 1.13
716 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
717 david.dillard 1.32 p++;
718 mike 1.13 719 // The next character must be a space: 720
721 chuck 1.26 if (_isspace(*p))
722 mike 1.13 {
723 david.dillard 1.32 *p++ = '\0';
724 mike 1.34 _skipWhitespace(_line, p);
725 mike 1.13 } 726 727 if (*p == '>') 728 {
729 david.dillard 1.32 *p++ = '\0'; 730 return true;
731 mike 1.13 } 732 733 if (p[0] == '/' && p[1] == '>') 734 {
735 david.dillard 1.32 openCloseElement = true; 736 *p = '\0'; 737 p += 2; 738 return true;
739 mike 1.13 } 740 741 return false; 742 } 743 744 void XmlParser::_getAttributeNameAndEqual(char*& p) 745 {
746 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
747 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
748 mike 1.35
749 kumpf 1.24 p++;
750 mike 1.13
751 mike 1.35 while (p && _isInnerElementChar[Uint8(p)])
752 david.dillard 1.32 p++;
753 mike 1.13 754 char* term = p; 755
756 mike 1.34 _skipWhitespace(_line, p);
757 mike 1.13 758 if (*p != '=')
759 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
760 mike 1.13 761 p++; 762
763 mike 1.34 _skipWhitespace(_line, p);
764 mike 1.13 765 term = '\0'; 766 } 767 768 void XmlParser::_getComment(char& p) 769 { 770 // Now p points to first non-whitespace character beyond "<--" sequence: 771 772 for (; *p; p++) 773 {
774 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 775 { 776 if (p[2] != '>') 777 { 778 throw XmlException( 779 XmlException::MINUS_MINUS_IN_COMMENT, _line); 780 } 781 782 // Find end of comment (excluding whitespace): 783 784 *p = '\0'; 785 p += 3; 786 return; 787 }
788 mike 1.13 } 789 790 // If it got this far, then the comment is unterminated: 791 792 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line); 793 } 794 795 void XmlParser::_getCData(char& p) 796 { 797 // At this point p points one past "<![CDATA[" sequence: 798 799 for (; p; p++) 800 {
801 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>') 802 { 803 p = '\0'; 804 p += 3; 805 return; 806 } 807 else if (p == '\n') 808 _line++;
809 mike 1.13 } 810 811 // If it got this far, then the comment is unterminated: 812 813 throw XmlException(XmlException::UNTERMINATED_CDATA, _line); 814 } 815 816 void XmlParser::_getDocType(char& p) 817 { 818 // Just ignore the DOCTYPE command for now: 819 820 for (; p && *p != '>'; p++) 821 {
822 david.dillard 1.32 if (*p == '\n') 823 _line++;
824 mike 1.13 } 825 826 if (*p != '>')
827 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
828 mike 1.13 829 p++; 830 } 831 832 void XmlParser::_getElement(char& p, XmlEntry& entry) 833 { 834 entry.attributeCount = 0; 835 836 //-------------------------------------------------------------------------- 837 // Get the element name (expect one of these: '?', '!', [A-Za-z_]) 838 //-------------------------------------------------------------------------- 839 840 if (p == '?') 841 {
842 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION; 843 entry.text = ++p;
844 mike 1.13
845 david.dillard 1.32 Boolean openCloseElement = false;
846 mike 1.13
847 david.dillard 1.32 if (_getElementName(p)) 848 return;
849 mike 1.13 } 850 else if (*p == '!') 851 {
852 david.dillard 1.32 p++;
853 mike 1.13
854 david.dillard 1.32 // Expect a comment or CDATA:
855 mike 1.13
856 david.dillard 1.32 if (p[0] == '-' && p[1] == '-') 857 { 858 p += 2; 859 entry.type = XmlEntry::COMMENT; 860 entry.text = p; 861 _getComment(p); 862 return; 863 } 864 else if (memcmp(p, "[CDATA[", 7) == 0) 865 { 866 p += 7; 867 entry.type = XmlEntry::CDATA; 868 entry.text = p; 869 _getCData(p); 870 return; 871 } 872 else if (memcmp(p, "DOCTYPE", 7) == 0) 873 { 874 entry.type = XmlEntry::DOCTYPE;
875 kumpf 1.37 entry.text = "";
876 david.dillard 1.32 _getDocType(p); 877 return; 878 } 879 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
880 mike 1.13 } 881 else if (*p == '/') 882 {
883 david.dillard 1.32 entry.type = XmlEntry::END_TAG; 884 entry.text = ++p;
885 mike 1.13
886 david.dillard 1.32 if (!_getElementName(p)) 887 throw(XmlException(XmlException::BAD_END_TAG, _line));
888 mike 1.13
889 david.dillard 1.32 return;
890 mike 1.13 }
891 david 1.25 else if ((((p >= 'A') && (p <= 'Z')) \|\|
892 kumpf 1.24 ((p >= 'a') && (p <= 'z')) \|\| 893 (*p == '_')))
894 mike 1.13 {
895 david.dillard 1.32 entry.type = XmlEntry::START_TAG; 896 entry.text = p;
897 mike 1.13
898 david.dillard 1.32 Boolean openCloseElement = false;
899 mike 1.13
900 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement)) 901 { 902 if (openCloseElement) 903 entry.type = XmlEntry::EMPTY_TAG; 904 return; 905 }
906 mike 1.13 } 907 else
908 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
909 mike 1.13 910 //-------------------------------------------------------------------------- 911 // Grab all the attributes: 912 //-------------------------------------------------------------------------- 913 914 for (;;) 915 {
916 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION) 917 { 918 if (p[0] == '?' && p[1] == '>') 919 { 920 p += 2; 921 return; 922 } 923 } 924 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>') 925 { 926 entry.type = XmlEntry::EMPTY_TAG; 927 p += 2; 928 return; 929 } 930 else if (*p == '>') 931 { 932 p++; 933 return; 934 } 935 936 XmlAttribute attr; 937 david.dillard 1.32 attr.name = p; 938 _getAttributeNameAndEqual(p); 939
940 kumpf 1.37 // Get the attribute value (e.g., "some value") 941 { 942 if ((p != '"') && (p != '\'')) 943 { 944 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 945 } 946 947 char quote = p++; 948 949 char start;
950 kumpf 1.42.4.1 _normalizeAttributeValue(_line, p, quote, start);
951 kumpf 1.37 attr.value = start; 952 953 if (*p != quote) 954 { 955 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 956 } 957 958 // Overwrite the closing quote with a null-terminator:
959 david.dillard 1.32
960 kumpf 1.37 *p++ = '\0'; 961 }
962 david.dillard 1.32 963 if (entry.type == XmlEntry::XML_DECLARATION) 964 { 965 // The next thing must a space or a "?>":
966 mike 1.13
967 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(p)) 968 { 969 throw XmlException( 970 XmlException::BAD_ATTRIBUTE_VALUE, _line); 971 } 972 } 973 else if (!(p == '>' \|\| (p[0] == '/' && p[1] == '>') \|\| _isspace(*p))) 974 { 975 // The next thing must be a space or a '>':
976 mike 1.13
977 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); 978 }
979 mike 1.13
980 mike 1.34 _skipWhitespace(_line, p);
981 david.dillard 1.32 982 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) 983 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); 984 985 entry.attributes[entry.attributeCount++] = attr;
986 mike 1.13 } 987 } 988 989 static const char* _typeStrings[] = 990 {
991 david.dillard 1.32 "XML_DECLARATION", 992 "START_TAG", 993 "EMPTY_TAG", 994 "END_TAG",
995 mike 1.13 "COMMENT", 996 "CDATA", 997 "DOCTYPE",
998 david.dillard 1.32 "CONTENT"
999 mike 1.13 }; 1000 1001 void XmlEntry::print() const 1002 { 1003 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; 1004 1005 Boolean needQuotes = type == XmlEntry::CDATA \|\| type == XmlEntry::CONTENT; 1006 1007 if (needQuotes)
1008 david.dillard 1.32 PEGASUS_STD(cout) << "\""; 1009
1010 mike 1.13 _printValue(text); 1011 1012 if (needQuotes)
1013 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1014 mike 1.13 1015 PEGASUS_STD(cout) << '\n'; 1016 1017 for (Uint32 i = 0; i < attributeCount; i++) 1018 {
1019 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; 1020 _printValue(attributes[i].value); 1021 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1022 mike 1.13 } 1023 } 1024 1025 const XmlAttribute* XmlEntry::findAttribute( 1026 const char* name) const 1027 { 1028 for (Uint32 i = 0; i < attributeCount; i++) 1029 {
1030 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0) 1031 return &attributes[i];
1032 mike 1.13 } 1033 1034 return 0; 1035 } 1036 1037 // Find first non-whitespace character (set first) and last non-whitespace 1038 // character (set last one past this). For example, consider this string: 1039 //
1040 david.dillard 1.32 // " 87 "
1041 mike 1.13 // 1042 // The first pointer would point to '8' and the last pointer woudl point one 1043 // beyond '7'. 1044 1045 static void _findEnds(
1046 david.dillard 1.32 const char* str, 1047 const char*& first,
1048 mike 1.13 const char*& last) 1049 { 1050 first = str; 1051
1052 chuck 1.26 while (_isspace(*first))
1053 david.dillard 1.32 first++;
1054 mike 1.13 1055 if (!*first) 1056 {
1057 david.dillard 1.32 last = first; 1058 return;
1059 mike 1.13 } 1060 1061 last = first + strlen(first); 1062
1063 chuck 1.26 while (last != first && _isspace(last[-1]))
1064 david.dillard 1.32 last--;
1065 mike 1.13 } 1066 1067 Boolean XmlEntry::getAttributeValue(
1068 david.dillard 1.32 const char* name,
1069 mike 1.13 Uint32& value) const 1070 { 1071 const XmlAttribute* attr = findAttribute(name); 1072 1073 if (!attr)
1074 david.dillard 1.32 return false;
1075 mike 1.13 1076 const char* first; 1077 const char* last; 1078 _findEnds(attr->value, first, last); 1079 1080 char* end = 0; 1081 long tmp = strtol(first, &end, 10); 1082 1083 if (!end \|\| end != last)
1084 david.dillard 1.32 return false;
1085 mike 1.13 1086 value = Uint32(tmp); 1087 return true; 1088 } 1089 1090 Boolean XmlEntry::getAttributeValue(
1091 david.dillard 1.32 const char* name,
1092 mike 1.13 Real32& value) const 1093 { 1094 const XmlAttribute* attr = findAttribute(name); 1095 1096 if (!attr)
1097 david.dillard 1.32 return false;
1098 mike 1.13 1099 const char* first; 1100 const char* last; 1101 _findEnds(attr->value, first, last); 1102 1103 char* end = 0; 1104 double tmp = strtod(first, &end); 1105 1106 if (!end \|\| end != last)
1107 david.dillard 1.32 return false;
1108 mike 1.13
1109 david.dillard 1.32 value = static_cast<Real32>(tmp);
1110 mike 1.13 return true; 1111 } 1112 1113 Boolean XmlEntry::getAttributeValue(
1114 david.dillard 1.32 const char* name,
1115 mike 1.13 const char& value) const 1116 { 1117 const XmlAttribute attr = findAttribute(name); 1118 1119 if (!attr)
1120 david.dillard 1.32 return false;
1121 mike 1.13 1122 value = attr->value; 1123 return true; 1124 } 1125 1126 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const 1127 { 1128 const char* tmp; 1129 1130 if (!getAttributeValue(name, tmp))
1131 david.dillard 1.32 return false;
1132 mike 1.13
1133 chuck 1.28 value = String(tmp);
1134 mike 1.13 return true; 1135 } 1136
1137 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
1138 mike 1.13 {
1139 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
1140 mike 1.13 } 1141 1142 PEGASUS_NAMESPACE_END

No CVS admin address has been configured