(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 mike  1.13 //%/////////////////////////////////////////////////////////////////////////////
   2            //
   3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
   4            // The Open Group, Tivoli Systems
   5 mike  1.13 //
   6            // Permission is hereby granted, free of charge, to any person obtaining a copy
   7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
   8            // deal in the Software without restriction, including without limitation the
   9            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  11            // furnished to do so, subject to the following conditions:
  12            // 
  13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  14 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  15            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  17            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21            //
  22            //==============================================================================
  23            //
  24            // Author: Mike Brasher (mbrasher@bmc.com)
  25            //
  26            // Modified By:
  27            //
  28            //%/////////////////////////////////////////////////////////////////////////////
  29            
  30            ////////////////////////////////////////////////////////////////////////////////
  31            //
  32            // XmlParser
  33            //
  34            //	This file contains a simple non-validating XML parser. Here are 
  35            //	serveral rules for well-formed XML:
  36            //
  37            //	    1.	Documents must begin with an XML declaration:
  38            //
  39            //		<?xml version="1.0" standalone="yes"?>
  40 mike  1.13 //
  41            //	    2.	Comments have the form:
  42            //
  43            //		<!-- blah blah blah -->
  44            //
  45            //	    3. The following entity references are supported:
  46            //
  47            //		&amp - ampersand
  48            //	 	&lt - less-than
  49            //		&gt - greater-than
  50            //		&quot - full quote
  51            //		&apos - apostrophe
  52            //
  53 kumpf 1.18 //             as well as character (numeric) references:
  54            
  55            //              &#49; - decimal reference for character '1'
  56            //              &#x31; - hexadecimal reference for character '1'
  57            //
  58 mike  1.13 //	    4. Element names and attribute names take the following form:
  59            //
  60            //		[A-Za-z_][A-Za-z_0-9-.:]
  61            //
  62            //	    5.	Arbitrary data (CDATA) can be enclosed like this:
  63            //
  64            //		    <![CDATA[
  65            //		    ...
  66            //		    ]]>
  67            //
  68            //	    6.	Element names and attributes names are case-sensitive.
  69            //
  70            //	    7.	XmlAttribute values must be delimited by full or half quotes.
  71            //		XmlAttribute values must be delimited.
  72            //
  73            //	    8.  <!DOCTYPE...>
  74            //
  75            // TODO:
  76            //
  77 karl  1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  78 mike  1.13 //	Handle <!DOCTYPE...> sections which are complicated (containing
  79            //        rules rather than references to files).
  80            //
  81            //	Remove newlines from string literals:
  82            //
  83            //          Example: <xyz x="hello
  84            //		world">
  85            //
  86            ////////////////////////////////////////////////////////////////////////////////
  87            
  88 sage  1.14 #include <Pegasus/Common/Config.h>
  89 mike  1.13 #include <cctype>
  90            #include <cstdio>
  91            #include <cstdlib>
  92            #include <cstring>
  93            #include "XmlParser.h"
  94            #include "Logger.h"
  95            
  96            PEGASUS_NAMESPACE_BEGIN
  97            
  98            #define PEGASUS_ARRAY_T XmlEntry
  99            # include "ArrayImpl.h"
 100            #undef PEGASUS_ARRAY_T
 101            
 102            
 103            ////////////////////////////////////////////////////////////////////////////////
 104            //
 105            // Static helper functions
 106            //
 107            ////////////////////////////////////////////////////////////////////////////////
 108            
 109            static void _printValue(const char* p)
 110 mike  1.13 {
 111                for (; *p; p++)
 112                {
 113            	if (*p == '\n')
 114            	    PEGASUS_STD(cout) << "\\n";
 115            	else if (*p == '\r')
 116            	    PEGASUS_STD(cout) << "\\r";
 117            	else if (*p == '\t')
 118            	    PEGASUS_STD(cout) << "\\t";
 119            	else
 120            	    PEGASUS_STD(cout) << *p;
 121                }
 122            }
 123            
 124            struct EntityReference
 125            {
 126                const char* match;
 127                Uint32 length;
 128                char replacement;
 129            };
 130            
 131 kumpf 1.18 // ATTN: Add support for more entity references
 132 mike  1.13 static EntityReference _references[] =
 133            {
 134                { "&amp;", 5, '&' },
 135                { "&lt;", 4, '<' },
 136                { "&gt;", 4, '>' },
 137                { "&quot;", 6, '"' },
 138                { "&apos;", 6, '\'' }
 139            };
 140            
 141            static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 142            
 143            // Remove all redundant spaces from the given string:
 144            
 145            static void _normalize(char* text)
 146            {
 147                Uint32 length = strlen(text);
 148                char* p = text;
 149                char* end = p + length;
 150            
 151                // Remove leading spaces:
 152            
 153 mike  1.13     while (isspace(*p))
 154            	p++;
 155            
 156                if (p != text)
 157            	memmove(text, p, end - p + 1);
 158            
 159                p = text;
 160            
 161                // Look for sequences of more than one space and remove all but one.
 162            
 163                for (;;)
 164                {
 165            	// Advance to the next space:
 166            
 167            	while (*p && !isspace(*p))
 168            	    p++;
 169            
 170            	if (!*p)
 171            	    break;
 172            
 173            	// Advance to the next non-space:
 174 mike  1.13 
 175            	char* q = p++;
 176            
 177            	while (isspace(*p))
 178            	    p++;
 179            
 180            	// Discard trailing spaces (if we are at the end):
 181            
 182            	if (!*p)
 183            	{
 184            	    *q = '\0';
 185            	    break;
 186            	}
 187            
 188            	// Remove the redundant spaces:
 189            
 190            	Uint32 n = p - q;
 191            
 192            	if (n > 1)
 193            	{
 194            	    *q++ = ' ';
 195 mike  1.13 	    memmove(q, p, end - p + 1);
 196            	    p = q;
 197            	}
 198                }
 199            }
 200            
 201            ////////////////////////////////////////////////////////////////////////////////
 202            //
 203            // XmlException
 204            //
 205            ////////////////////////////////////////////////////////////////////////////////
 206            
 207            static const char* _xmlMessages[] =
 208            {
 209                "Bad opening element",
 210                "Bad closing element",
 211                "Bad attribute name",
 212                "Exepected equal sign",
 213                "Bad attribute value",
 214                "A \"--\" sequence found within comment",
 215                "Unterminated comment",
 216 mike  1.13     "Unterminated CDATA block",
 217                "Unterminated DOCTYPE",
 218                "Too many attributes: parser only handles 10",
 219                "Malformed reference",
 220                "Expected a comment or CDATA following \"<!\" sequence",
 221                "Closing element does not match opening element",
 222                "One or more tags are still open",
 223                "More than one root element was encountered",
 224                "Validation error",
 225                "Semantic error"
 226            };
 227            
 228            static String _formMessage(Uint32 code, Uint32 line, const String& message)
 229            {
 230                String result = _xmlMessages[Uint32(code) - 1];
 231            
 232                char buffer[32];
 233                sprintf(buffer, "%d", line);
 234                result.append(": on line ");
 235                result.append(buffer);
 236            
 237 mike  1.13     if (message.size())
 238                {
 239            	result.append(": ");
 240            	result.append(message);
 241                }
 242            
 243                return result;
 244            }
 245            
 246            XmlException::XmlException(
 247                XmlException::Code code, 
 248                Uint32 lineNumber,
 249                const String& message) 
 250                : Exception(_formMessage(code, lineNumber, message))
 251            {
 252            
 253            }
 254            
 255            ////////////////////////////////////////////////////////////////////////////////
 256            //
 257            // XmlValidationError
 258 mike  1.13 //
 259            ////////////////////////////////////////////////////////////////////////////////
 260            
 261            XmlValidationError::XmlValidationError(
 262                Uint32 lineNumber,
 263                const String& message)
 264                : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 265            {
 266            
 267            }
 268            
 269            ////////////////////////////////////////////////////////////////////////////////
 270            //
 271            // XmlSemanticError
 272            //
 273            ////////////////////////////////////////////////////////////////////////////////
 274            
 275            XmlSemanticError::XmlSemanticError(
 276                Uint32 lineNumber,
 277                const String& message)
 278                : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 279 mike  1.13 {
 280            
 281            }
 282            
 283            ////////////////////////////////////////////////////////////////////////////////
 284            //
 285            // XmlParser
 286            //
 287            ////////////////////////////////////////////////////////////////////////////////
 288            
 289            XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 
 290                _restoreChar('\0'), _foundRoot(false)
 291            {
 292            
 293            }
 294            
 295            Boolean XmlParser::next(XmlEntry& entry)
 296            {
 297                if (!_putBackStack.isEmpty())
 298                {
 299            	entry = _putBackStack.top();
 300 mike  1.13 	_putBackStack.pop();
 301            	return true;
 302                }
 303            
 304                // If a character was overwritten with a null-terminator the last
 305                // time this routine was called, then put back that character. Before
 306                // exiting of course, restore the null-terminator.
 307            
 308                char* nullTerminator = 0;
 309            
 310                if (_restoreChar && !*_current)
 311                {
 312            	nullTerminator = _current;
 313            	*_current = _restoreChar;
 314            	_restoreChar = '\0';
 315                }
 316            
 317                // Skip over any whitespace:
 318            
 319                _skipWhitespace(_current);
 320            
 321 mike  1.13     if (!*_current)
 322                {
 323            	if (nullTerminator)
 324            	    *nullTerminator = '\0';
 325            
 326            	if (!_stack.isEmpty())
 327            	    throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 328            
 329            	return false;
 330                }
 331            
 332                // Either a "<...>" or content begins next:
 333            
 334                if (*_current == '<')
 335                {
 336            	_current++;
 337            	_getElement(_current, entry);
 338            
 339            	if (nullTerminator)
 340            	    *nullTerminator = '\0';
 341            
 342 mike  1.13 	if (entry.type == XmlEntry::START_TAG)
 343            	{
 344            	    if (_stack.isEmpty() && _foundRoot)
 345            		throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 346            
 347            	    _foundRoot = true;
 348            	    _stack.push((char*)entry.text);
 349            	}
 350            	else if (entry.type == XmlEntry::END_TAG)
 351            	{
 352            	    if (_stack.isEmpty())
 353            		throw XmlException(XmlException::START_END_MISMATCH, _line);
 354            
 355            	    if (strcmp(_stack.top(), entry.text) != 0)
 356            		throw XmlException(XmlException::START_END_MISMATCH, _line);
 357            
 358            	    _stack.pop();
 359            	}
 360            
 361            	return true;
 362                }
 363 mike  1.13     else
 364                {
 365            	entry.type = XmlEntry::CONTENT;
 366            	entry.text = _current;
 367            	_getContent(_current);
 368            	_restoreChar = *_current;
 369            	*_current = '\0';
 370            
 371            	if (nullTerminator)
 372            	    *nullTerminator = '\0';
 373            
 374            	_substituteReferences((char*)entry.text);
 375            	_normalize((char*)entry.text);
 376            
 377            	return true;
 378                }
 379            }
 380            
 381            void XmlParser::putBack(XmlEntry& entry)
 382            {
 383                _putBackStack.push(entry);
 384 mike  1.13 }
 385            
 386            XmlParser::~XmlParser()
 387            {
 388                // Nothing to do!
 389            }
 390            
 391            void XmlParser::_skipWhitespace(char*& p)
 392            {
 393                while (*p && isspace(*p))
 394                {
 395            	if (*p == '\n')
 396            	    _line++;
 397            
 398            	p++;
 399                }
 400            }
 401            
 402            Boolean XmlParser::_getElementName(char*& p)
 403            {
 404                if (!isalpha(*p) && *p != '_')
 405 mike  1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 406            
 407                while (*p && 
 408            	(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 409            	p++;
 410            
 411                // The next character must be a space:
 412            
 413                if (isspace(*p))
 414                {
 415            	*p++ = '\0';
 416            	_skipWhitespace(p);
 417                }
 418            
 419                if (*p == '>')
 420                {
 421            	*p++ = '\0';
 422            	return true;
 423                }
 424            
 425                return false;
 426 mike  1.13 }
 427            
 428            Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 429            {
 430                openCloseElement = false;
 431            
 432                if (!isalpha(*p) && *p != '_')
 433            	throw XmlException(XmlException::BAD_START_TAG, _line);
 434            
 435                while (*p && 
 436            	(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 437            	p++;
 438            
 439                // The next character must be a space:
 440            
 441                if (isspace(*p))
 442                {
 443            	*p++ = '\0';
 444            	_skipWhitespace(p);
 445                }
 446            
 447 mike  1.13     if (*p == '>')
 448                {
 449            	*p++ = '\0';
 450            	return true;
 451                }
 452            
 453                if (p[0] == '/' && p[1] == '>')
 454                {
 455            	openCloseElement = true;
 456            	*p = '\0';
 457            	p += 2;
 458            	return true;
 459                }
 460            
 461                return false;
 462            }
 463            
 464            void XmlParser::_getAttributeNameAndEqual(char*& p)
 465            {
 466                if (!isalpha(*p) && *p != '_')
 467            	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 468 mike  1.13 
 469                while (*p && 
 470            	(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 471            	p++;
 472            
 473                char* term = p;
 474            
 475                _skipWhitespace(p);
 476            
 477                if (*p != '=')
 478            	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 479            
 480                p++;
 481            
 482                _skipWhitespace(p);
 483            
 484                *term = '\0';
 485            }
 486            
 487            void XmlParser::_getAttributeValue(char*& p)
 488            {
 489 mike  1.13     // ATTN-B: handle values contained in semiquotes:
 490            
 491                if (*p != '"' && *p != '\'')
 492            	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 493            
 494                char startChar = *p++;
 495            
 496                while (*p && *p != startChar)
 497            	p++;
 498            
 499                if (*p != startChar)
 500            	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 501            
 502                *p++ = '\0';
 503            }
 504            
 505            void XmlParser::_getComment(char*& p)
 506            {
 507                // Now p points to first non-whitespace character beyond "<--" sequence:
 508            
 509                for (; *p; p++)
 510 mike  1.13     {
 511            	if (p[0] == '-' && p[1] == '-')
 512            	{
 513            	    if (p[2] != '>')
 514            	    {
 515            		throw XmlException(
 516            		    XmlException::MINUS_MINUS_IN_COMMENT, _line);
 517            	    }
 518            
 519            	    // Find end of comment (excluding whitespace):
 520            
 521            	    *p = '\0';
 522            	    p += 3;
 523            	    return;
 524            	}
 525                }
 526            
 527                // If it got this far, then the comment is unterminated:
 528            
 529                throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 530            }
 531 mike  1.13 
 532            void XmlParser::_getCData(char*& p)
 533            {
 534                // At this point p points one past "<![CDATA[" sequence:
 535            
 536                for (; *p; p++)
 537                {
 538            	if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 539            	{
 540            	    *p = '\0';
 541            	    p += 3;
 542            	    return;
 543            	}
 544            	else if (*p == '\n')
 545            	    _line++;
 546                }
 547            
 548                // If it got this far, then the comment is unterminated:
 549            
 550                throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 551            }
 552 mike  1.13 
 553            void XmlParser::_getDocType(char*& p)
 554            {
 555                // Just ignore the DOCTYPE command for now:
 556            
 557                for (; *p && *p != '>'; p++)
 558                {
 559            	if (*p == '\n')
 560            	    _line++;
 561                }
 562            
 563                if (*p != '>')
 564            	throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 565            
 566                p++;
 567            }
 568            
 569            void XmlParser::_getContent(char*& p)
 570            {
 571                while (*p && *p != '<')
 572                {
 573 mike  1.13 	if (*p == '\n')
 574            	    _line++;
 575            
 576            	p++;
 577                }
 578            }
 579            
 580            void XmlParser::_substituteReferences(char* text)
 581            {
 582                Uint32 rem = strlen(text);
 583            
 584                for (char* p = text; *p; p++, rem--)
 585                {
 586            	if (*p == '&')
 587            	{
 588 kumpf 1.18             // Process character or entity reference
 589 mike  1.13 
 590 kumpf 1.18             Uint16 referenceChar = 0;
 591                        Uint32 referenceLength = 0;
 592                        XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 593            
 594                        if (*(p+1) == '#')
 595                        {
 596                            // Found a character (numeric) reference
 597                            // Determine whether it is decimal or hex
 598                            if (*(p+2) == 'x')
 599                            {
 600                                // Decode a hexadecimal character reference
 601                                char* q = p+3;
 602            
 603                                // At most four digits are allowed, plus trailing ';'
 604                                Uint32 numDigits;
 605                                for (numDigits = 0; numDigits < 5; numDigits++, q++)
 606                                {
 607                                    if (isdigit(*q))
 608                                    {
 609                                        referenceChar = (referenceChar << 4);
 610                                        referenceChar += (*q - '0');
 611 kumpf 1.18                         }
 612                                    else if ((*q >= 'A') && (*q <= 'F'))
 613                                    {
 614                                        referenceChar = (referenceChar << 4);
 615                                        referenceChar += (*q - 'A' + 10);
 616                                    }
 617                                    else if ((*q >= 'a') && (*q <= 'f'))
 618                                    {
 619                                        referenceChar = (referenceChar << 4);
 620                                        referenceChar += (*q - 'a' + 10);
 621                                    }
 622                                    else if (*q == ';')
 623                                    {
 624                                        break;
 625                                    }
 626                                    else
 627                                    {
 628                                        throw XmlException(code, _line);
 629                                    }
 630                                }
 631            
 632 kumpf 1.18                     // Hex number must be 1 - 4 digits
 633                                if ((numDigits == 0) || (numDigits > 4))
 634                                {
 635                                    throw XmlException(code, _line);
 636                                }
 637            
 638                                // ATTN: Currently do not support 16-bit characters
 639                                if (referenceChar > 0xff)
 640                                {
 641                                    // ATTN: Is there a good way to say "unsupported"?
 642                                    throw XmlException(code, _line);
 643                                }
 644            
 645                                referenceLength = numDigits + 4;
 646                            }
 647                            else
 648                            {
 649                                // Decode a decimal character reference
 650                                Uint32 newChar = 0;
 651                                char* q = p+2;
 652            
 653 kumpf 1.18                     // At most five digits are allowed, plus trailing ';'
 654                                Uint32 numDigits;
 655                                for (numDigits = 0; numDigits < 6; numDigits++, q++)
 656                                {
 657                                    if (isdigit(*q))
 658                                    {
 659                                        newChar = (newChar * 10);
 660                                        newChar += (*q - '0');
 661                                    }
 662                                    else if (*q == ';')
 663                                    {
 664                                        break;
 665                                    }
 666                                    else
 667                                    {
 668                                        throw XmlException(code, _line);
 669                                    }
 670                                }
 671            
 672                                // Decimal number must be 1 - 5 digits and fit in 16 bits
 673                                if ((numDigits == 0) || (numDigits > 5) ||
 674 kumpf 1.18                         (newChar > 0xffff))
 675                                {
 676                                    throw XmlException(code, _line);
 677                                }
 678            
 679                                // ATTN: Currently do not support 16-bit characters
 680                                if (newChar > 0xff)
 681                                {
 682                                    // ATTN: Is there a good way to say "unsupported"?
 683                                    throw XmlException(code, _line);
 684                                }
 685            
 686                                referenceChar = Uint16(newChar);
 687                                referenceLength = numDigits + 3;
 688                            }
 689                        }
 690                        else
 691                        {
 692                            // Check for entity reference
 693                            // ATTN: Inefficient if many entity references are supported
 694                            Uint32 i;
 695 kumpf 1.18                 for (i = 0; i < _REFERENCES_SIZE; i++)
 696                            {
 697                                Uint32 length = _references[i].length;
 698                                const char* match = _references[i].match;
 699            
 700                                if (strncmp(p, _references[i].match, length) == 0)
 701                                {
 702                                    referenceChar = _references[i].replacement;
 703                                    referenceLength = length;
 704                                    break;
 705                                }
 706                            }
 707            
 708                            if (i == _REFERENCES_SIZE)
 709                            {
 710                                // Didn't recognize the entity reference
 711                                // ATTN: Is there a good way to say "unsupported"?
 712                                throw XmlException(code, _line);
 713                            }
 714                        }
 715            
 716 kumpf 1.18             // Replace the reference with the correct character
 717                        *p = (char)referenceChar;
 718                        char* q = p + referenceLength;
 719                        rem = rem - referenceLength + 1;
 720                        memmove(p + 1, q, rem);
 721 mike  1.13 	}
 722                }
 723            }
 724            
 725            static const char _EMPTY_STRING[] = "";
 726            
 727            void XmlParser::_getElement(char*& p, XmlEntry& entry)
 728            {
 729                entry.attributeCount = 0;
 730            
 731                //--------------------------------------------------------------------------
 732                // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 733                //--------------------------------------------------------------------------
 734            
 735                if (*p == '?')
 736                {
 737            	entry.type = XmlEntry::XML_DECLARATION;
 738            	entry.text = ++p;
 739            
 740            	Boolean openCloseElement = false;
 741            
 742 mike  1.13 	if (_getElementName(p))
 743            	    return;
 744                }
 745                else if (*p == '!')
 746                {
 747            	p++;
 748            
 749            	// Expect a comment or CDATA:
 750            
 751            	if (p[0] == '-' && p[1] == '-')
 752            	{
 753            	    p += 2;
 754            	    entry.type = XmlEntry::COMMENT;
 755            	    entry.text = p;
 756            	    _getComment(p);
 757            	    return;
 758            	}
 759            	else if (memcmp(p, "[CDATA[", 7) == 0)
 760            	{
 761            	    p += 7;
 762            	    entry.type = XmlEntry::CDATA;
 763 mike  1.13 	    entry.text = p;
 764            	    _getCData(p);
 765            	    return;
 766            	}
 767            	else if (memcmp(p, "DOCTYPE", 7) == 0)
 768            	{
 769            	    entry.type = XmlEntry::DOCTYPE;
 770            	    entry.text = _EMPTY_STRING;
 771            	    _getDocType(p);
 772            	    return;
 773            	}
 774            	throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 775                }
 776                else if (*p == '/')
 777                {
 778            	entry.type = XmlEntry::END_TAG;
 779            	entry.text = ++p;
 780            
 781            	if (!_getElementName(p))
 782            	    throw(XmlException(XmlException::BAD_END_TAG, _line));
 783            
 784 mike  1.13 	return;
 785                }
 786                else if (isalpha(*p) || *p == '_')
 787                {
 788            	entry.type = XmlEntry::START_TAG;
 789            	entry.text = p;
 790            
 791            	Boolean openCloseElement = false;
 792            
 793            	if (_getOpenElementName(p, openCloseElement))
 794            	{
 795            	    if (openCloseElement)
 796            		entry.type = XmlEntry::EMPTY_TAG;
 797            	    return;
 798            	}
 799                }
 800                else
 801            	throw XmlException(XmlException::BAD_START_TAG, _line);
 802            
 803                //--------------------------------------------------------------------------
 804                // Grab all the attributes:
 805 mike  1.13     //--------------------------------------------------------------------------
 806            
 807                for (;;)
 808                {
 809            	if (entry.type == XmlEntry::XML_DECLARATION)
 810            	{
 811            	    if (p[0] == '?' && p[1] == '>')
 812            	    {
 813            		p += 2;
 814            		return;
 815            	    }
 816            	}
 817            	else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 818            	{
 819            	    entry.type = XmlEntry::EMPTY_TAG;
 820            	    p += 2;
 821            	    return;
 822            	}
 823            	else if (*p == '>')
 824            	{
 825            	    p++;
 826 mike  1.13 	    return;
 827            	}
 828            
 829            	XmlAttribute attr;
 830            	attr.name = p;
 831            	_getAttributeNameAndEqual(p);
 832            
 833            	if (*p != '"' && *p != '\'')
 834            	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 835            
 836            	attr.value = p + 1;
 837            	_getAttributeValue(p);
 838            
 839            	if (entry.type == XmlEntry::XML_DECLARATION)
 840            	{
 841            	    // The next thing must a space or a "?>":
 842            
 843            	    if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
 844            	    {
 845            		throw XmlException(
 846            		    XmlException::BAD_ATTRIBUTE_VALUE, _line);
 847 mike  1.13 	    }
 848            	}
 849            	else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
 850            	{
 851            	    // The next thing must be a space or a '>':
 852            
 853            	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 854            	}
 855            
 856            	_skipWhitespace(p);
 857            
 858            	if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 859            	    throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 860            
 861            	_substituteReferences((char*)attr.value);
 862            	entry.attributes[entry.attributeCount++] = attr;
 863                }
 864            }
 865            
 866            static const char* _typeStrings[] =
 867            {
 868 mike  1.13     "XML_DECLARATION", 
 869                "START_TAG", 
 870                "EMPTY_TAG", 
 871                "END_TAG", 
 872                "COMMENT",
 873                "CDATA",
 874                "DOCTYPE",
 875                "CONTENT" 
 876            };
 877            
 878            void XmlEntry::print() const
 879            {
 880                PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
 881            
 882                Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
 883            
 884                if (needQuotes)
 885            	PEGASUS_STD(cout) << "\"";
 886            	
 887                _printValue(text);
 888            
 889 mike  1.13     if (needQuotes)
 890            	PEGASUS_STD(cout) << "\"";
 891            
 892                PEGASUS_STD(cout) << '\n';
 893            
 894                for (Uint32 i = 0; i < attributeCount; i++)
 895                {
 896            	PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
 897            	_printValue(attributes[i].value);
 898            	PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
 899                }
 900            }
 901            
 902            const XmlAttribute* XmlEntry::findAttribute(
 903                const char* name) const
 904            {
 905                for (Uint32 i = 0; i < attributeCount; i++)
 906                {
 907            	if (strcmp(attributes[i].name, name) == 0)
 908            	    return &attributes[i];
 909                }
 910 mike  1.13 
 911                return 0;
 912            }
 913            
 914            // Find first non-whitespace character (set first) and last non-whitespace
 915            // character (set last one past this). For example, consider this string:
 916            //
 917            //	"   87     "
 918            //
 919            // The first pointer would point to '8' and the last pointer woudl point one
 920            // beyond '7'.
 921            
 922            static void _findEnds(
 923                const char* str, 
 924                const char*& first, 
 925                const char*& last)
 926            {
 927                first = str;
 928            
 929                while (isspace(*first))
 930            	first++;
 931 mike  1.13 
 932                if (!*first)
 933                {
 934            	last = first;
 935            	return;
 936                }
 937            
 938                last = first + strlen(first);
 939            
 940                while (last != first && isspace(last[-1]))
 941            	last--;
 942            }
 943            
 944            Boolean XmlEntry::getAttributeValue(
 945                const char* name, 
 946                Uint32& value) const
 947            {
 948                const XmlAttribute* attr = findAttribute(name);
 949            
 950                if (!attr)
 951            	return false;
 952 mike  1.13 
 953                const char* first;
 954                const char* last;
 955                _findEnds(attr->value, first, last);
 956            
 957                char* end = 0;
 958                long tmp = strtol(first, &end, 10);
 959            
 960                if (!end || end != last)
 961            	return false;
 962            
 963                value = Uint32(tmp);
 964                return true;
 965            }
 966            
 967            Boolean XmlEntry::getAttributeValue(
 968                const char* name, 
 969                Real32& value) const
 970            {
 971                const XmlAttribute* attr = findAttribute(name);
 972            
 973 mike  1.13     if (!attr)
 974            	return false;
 975            
 976                const char* first;
 977                const char* last;
 978                _findEnds(attr->value, first, last);
 979            
 980                char* end = 0;
 981                double tmp = strtod(first, &end);
 982            
 983                if (!end || end != last)
 984            	return false;
 985            
 986                value = Uint32(tmp);
 987                return true;
 988            }
 989            
 990            Boolean XmlEntry::getAttributeValue(
 991                const char* name, 
 992                const char*& value) const
 993            {
 994 mike  1.13     const XmlAttribute* attr = findAttribute(name);
 995            
 996                if (!attr)
 997            	return false;
 998            
 999                value = attr->value;
1000                return true;
1001            }
1002            
1003            Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1004            {
1005                const char* tmp;
1006            
1007                if (!getAttributeValue(name, tmp))
1008            	return false;
1009            
1010                value = tmp;
1011                return true;
1012            }
1013            
1014            void XmlAppendCString(Array<Sint8>& out, const char* str)
1015 mike  1.13 {
1016                out.append(str, strlen(str));
1017            }
1018            
1019            PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2