(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.27 //%2003////////////////////////////////////////////////////////////////////////
   2 mike  1.13 //
   3 karl  1.27 // Copyright (c) 2000, 2001, 2002  BMC Software, Hewlett-Packard Development
   4            // Company, L. P., IBM Corp., The Open Group, Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L. P.;
   6            // IBM Corp.; EMC Corporation, The Open Group.
   7 mike  1.13 //
   8            // Permission is hereby granted, free of charge, to any person obtaining a copy
   9 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
  10            // deal in the Software without restriction, including without limitation the
  11            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  12 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  13            // furnished to do so, subject to the following conditions:
  14            // 
  15 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  16 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  17            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  18 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  19            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  20            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  21 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  22            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23            //
  24            //==============================================================================
  25            //
  26            // Author: Mike Brasher (mbrasher@bmc.com)
  27            //
  28            // Modified By:
  29            //
  30            //%/////////////////////////////////////////////////////////////////////////////
  31            
  32            ////////////////////////////////////////////////////////////////////////////////
  33            //
  34            // XmlParser
  35            //
  36            //	This file contains a simple non-validating XML parser. Here are 
  37            //	serveral rules for well-formed XML:
  38            //
  39            //	    1.	Documents must begin with an XML declaration:
  40            //
  41            //		<?xml version="1.0" standalone="yes"?>
  42 mike  1.13 //
  43            //	    2.	Comments have the form:
  44            //
  45            //		<!-- blah blah blah -->
  46            //
  47            //	    3. The following entity references are supported:
  48            //
  49            //		&amp - ampersand
  50            //	 	&lt - less-than
  51            //		&gt - greater-than
  52            //		&quot - full quote
  53            //		&apos - apostrophe
  54            //
  55 kumpf 1.18 //             as well as character (numeric) references:
  56            
  57            //              &#49; - decimal reference for character '1'
  58            //              &#x31; - hexadecimal reference for character '1'
  59            //
  60 mike  1.13 //	    4. Element names and attribute names take the following form:
  61            //
  62            //		[A-Za-z_][A-Za-z_0-9-.:]
  63            //
  64            //	    5.	Arbitrary data (CDATA) can be enclosed like this:
  65            //
  66            //		    <![CDATA[
  67            //		    ...
  68            //		    ]]>
  69            //
  70            //	    6.	Element names and attributes names are case-sensitive.
  71            //
  72            //	    7.	XmlAttribute values must be delimited by full or half quotes.
  73            //		XmlAttribute values must be delimited.
  74            //
  75            //	    8.  <!DOCTYPE...>
  76            //
  77            // TODO:
  78            //
  79 karl  1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  80 mike  1.13 //	Handle <!DOCTYPE...> sections which are complicated (containing
  81            //        rules rather than references to files).
  82            //
  83            //	Remove newlines from string literals:
  84            //
  85            //          Example: <xyz x="hello
  86            //		world">
  87            //
  88            ////////////////////////////////////////////////////////////////////////////////
  89            
  90 sage  1.14 #include <Pegasus/Common/Config.h>
  91 mike  1.13 #include <cctype>
  92            #include <cstdio>
  93            #include <cstdlib>
  94            #include <cstring>
  95            #include "XmlParser.h"
  96            #include "Logger.h"
  97 chuck 1.19 #include "ExceptionRep.h"
  98 mike  1.13 
  99            PEGASUS_NAMESPACE_BEGIN
 100            
 101            #define PEGASUS_ARRAY_T XmlEntry
 102            # include "ArrayImpl.h"
 103            #undef PEGASUS_ARRAY_T
 104            
 105            
 106            ////////////////////////////////////////////////////////////////////////////////
 107            //
 108            // Static helper functions
 109            //
 110            ////////////////////////////////////////////////////////////////////////////////
 111            
 112            static void _printValue(const char* p)
 113            {
 114                for (; *p; p++)
 115                {
 116            	if (*p == '\n')
 117            	    PEGASUS_STD(cout) << "\\n";
 118            	else if (*p == '\r')
 119 mike  1.13 	    PEGASUS_STD(cout) << "\\r";
 120            	else if (*p == '\t')
 121            	    PEGASUS_STD(cout) << "\\t";
 122            	else
 123            	    PEGASUS_STD(cout) << *p;
 124                }
 125            }
 126            
 127            struct EntityReference
 128            {
 129                const char* match;
 130                Uint32 length;
 131                char replacement;
 132            };
 133            
 134 kumpf 1.18 // ATTN: Add support for more entity references
 135 mike  1.13 static EntityReference _references[] =
 136            {
 137                { "&amp;", 5, '&' },
 138                { "&lt;", 4, '<' },
 139                { "&gt;", 4, '>' },
 140                { "&quot;", 6, '"' },
 141                { "&apos;", 6, '\'' }
 142            };
 143            
 144 chuck 1.26 
 145            // Implements a check for a whitespace character, without calling
 146            // isspace( ).  The isspace( ) function is locale-sensitive,
 147            // and incorrectly flags some chars above 0x7f as whitespace.  This
 148            // causes the XmlParser to incorrectly parse UTF-8 data.
 149            //
 150            // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
 151            // defines white space as:
 152            // S    ::=    (#x20 | #x9 | #xD | #xA)+ 
 153            static int _isspace(char c)
 154            {
 155            	if (c == ' ' || c == '\r' || c == '\t' || c == '\n')
 156            		return 1;
 157            	return 0;
 158            }
 159            
 160            
 161 mike  1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 162            
 163            // Remove all redundant spaces from the given string:
 164            
 165            static void _normalize(char* text)
 166            {
 167                Uint32 length = strlen(text);
 168                char* p = text;
 169                char* end = p + length;
 170            
 171                // Remove leading spaces:
 172            
 173 chuck 1.26     while (_isspace(*p))
 174            		p++;
 175 mike  1.13 
 176                if (p != text)
 177            	memmove(text, p, end - p + 1);
 178            
 179                p = text;
 180            
 181                // Look for sequences of more than one space and remove all but one.
 182            
 183                for (;;)
 184                {
 185            	// Advance to the next space:
 186            
 187 chuck 1.26 	while (*p && !_isspace(*p))
 188 mike  1.13 	    p++;
 189            
 190            	if (!*p)
 191            	    break;
 192            
 193            	// Advance to the next non-space:
 194            
 195            	char* q = p++;
 196            
 197 chuck 1.26 	while (_isspace(*p))
 198 mike  1.13 	    p++;
 199            
 200            	// Discard trailing spaces (if we are at the end):
 201            
 202            	if (!*p)
 203            	{
 204            	    *q = '\0';
 205            	    break;
 206            	}
 207            
 208            	// Remove the redundant spaces:
 209            
 210            	Uint32 n = p - q;
 211            
 212            	if (n > 1)
 213            	{
 214            	    *q++ = ' ';
 215            	    memmove(q, p, end - p + 1);
 216            	    p = q;
 217            	}
 218                }
 219 mike  1.13 }
 220            
 221            ////////////////////////////////////////////////////////////////////////////////
 222            //
 223            // XmlException
 224            //
 225            ////////////////////////////////////////////////////////////////////////////////
 226            
 227            static const char* _xmlMessages[] =
 228            {
 229                "Bad opening element",
 230                "Bad closing element",
 231                "Bad attribute name",
 232                "Exepected equal sign",
 233                "Bad attribute value",
 234                "A \"--\" sequence found within comment",
 235                "Unterminated comment",
 236                "Unterminated CDATA block",
 237                "Unterminated DOCTYPE",
 238                "Too many attributes: parser only handles 10",
 239                "Malformed reference",
 240 mike  1.13     "Expected a comment or CDATA following \"<!\" sequence",
 241                "Closing element does not match opening element",
 242                "One or more tags are still open",
 243                "More than one root element was encountered",
 244                "Validation error",
 245                "Semantic error"
 246            };
 247            
 248 chuck 1.19 static const char* _xmlKeys[] = 
 249            {
 250 humberto 1.20     "Common.XmlParser.BAD_START_TAG",
 251 chuck    1.19     "Common.XmlParser.BAD_END_TAG",
 252                   "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 253                   "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 254                   "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 255                   "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 256                   "Common.XmlParser.UNTERMINATED_COMMENT",
 257                   "Common.XmlParser.UNTERMINATED_CDATA",
 258                   "Common.XmlParser.UNTERMINATED_DOCTYPE",
 259                   "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 260                   "Common.XmlParser.MALFORMED_REFERENCE",
 261                   "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 262                   "Common.XmlParser.START_END_MISMATCH",
 263                   "Common.XmlParser.UNCLOSED_TAGS", 
 264                   "Common.XmlParser.MULTIPLE_ROOTS",
 265                   "Common.XmlParser.VALIDATION_ERROR",
 266                   "Common.XmlParser.SEMANTIC_ERROR"
 267               };
 268               
 269 chuck    1.23 // l10n replace _formMessage (comment out the old one)
 270 chuck    1.19 /*
 271 mike     1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 272               {
 273                   String result = _xmlMessages[Uint32(code) - 1];
 274               
 275                   char buffer[32];
 276                   sprintf(buffer, "%d", line);
 277                   result.append(": on line ");
 278                   result.append(buffer);
 279               
 280                   if (message.size())
 281                   {
 282               	result.append(": ");
 283               	result.append(message);
 284                   }
 285               
 286                   return result;
 287               }
 288 chuck    1.19 */
 289               
 290               static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 291               {
 292                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 293                   String key = _xmlKeys[Uint32(code) - 1];
 294               	String msg = message;
 295               
 296                   dftMsg.append(": on line $0");
 297                   if (message.size())
 298                   {
 299 humberto 1.20     	msg = ": " + msg;
 300 chuck    1.19     	dftMsg.append("$1");
 301                   }    
 302               
 303                   return MessageLoaderParms(key, dftMsg, line ,msg);
 304               }
 305               
 306               static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 307               {
 308                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 309                   String key = _xmlKeys[Uint32(code) - 1];
 310               
 311                   dftMsg.append(": on line $0");
 312                
 313                   return MessageLoaderParms(key, dftMsg, line);
 314               }
 315               
 316 mike     1.13 
 317               XmlException::XmlException(
 318                   XmlException::Code code, 
 319                   Uint32 lineNumber,
 320                   const String& message) 
 321                   : Exception(_formMessage(code, lineNumber, message))
 322               {
 323               
 324               }
 325               
 326 chuck    1.19 
 327               XmlException::XmlException(
 328                   XmlException::Code code, 
 329                   Uint32 lineNumber,
 330                   MessageLoaderParms& msgParms) 
 331                   : Exception(_formPartialMessage(code, lineNumber))
 332               {
 333 humberto 1.21 	if (msgParms.default_msg.size())
 334                   {
 335                   	msgParms.default_msg = ": " + msgParms.default_msg;
 336                   } 
 337 chuck    1.19 	_rep->message.append(MessageLoader::getMessage(msgParms));
 338               }
 339               
 340               
 341 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 342               //
 343               // XmlValidationError
 344               //
 345               ////////////////////////////////////////////////////////////////////////////////
 346               
 347               XmlValidationError::XmlValidationError(
 348                   Uint32 lineNumber,
 349                   const String& message)
 350                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 351               {
 352               
 353               }
 354               
 355 chuck    1.19 
 356               XmlValidationError::XmlValidationError(
 357                   Uint32 lineNumber,
 358                   MessageLoaderParms& msgParms)
 359                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 360               {
 361               
 362               }
 363               
 364               
 365 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 366               //
 367               // XmlSemanticError
 368               //
 369               ////////////////////////////////////////////////////////////////////////////////
 370               
 371               XmlSemanticError::XmlSemanticError(
 372                   Uint32 lineNumber,
 373                   const String& message)
 374                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 375               {
 376               
 377               }
 378 chuck    1.19 
 379               
 380               XmlSemanticError::XmlSemanticError(
 381                   Uint32 lineNumber,
 382                   MessageLoaderParms& msgParms)
 383                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 384               {
 385               
 386               }
 387               
 388 mike     1.13 
 389               ////////////////////////////////////////////////////////////////////////////////
 390               //
 391               // XmlParser
 392               //
 393               ////////////////////////////////////////////////////////////////////////////////
 394               
 395               XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 
 396                   _restoreChar('\0'), _foundRoot(false)
 397               {
 398               
 399               }
 400               
 401               Boolean XmlParser::next(XmlEntry& entry)
 402               {
 403                   if (!_putBackStack.isEmpty())
 404                   {
 405               	entry = _putBackStack.top();
 406               	_putBackStack.pop();
 407               	return true;
 408                   }
 409 mike     1.13 
 410                   // If a character was overwritten with a null-terminator the last
 411                   // time this routine was called, then put back that character. Before
 412                   // exiting of course, restore the null-terminator.
 413               
 414                   char* nullTerminator = 0;
 415               
 416                   if (_restoreChar && !*_current)
 417                   {
 418               	nullTerminator = _current;
 419               	*_current = _restoreChar;
 420               	_restoreChar = '\0';
 421                   }
 422               
 423                   // Skip over any whitespace:
 424               
 425                   _skipWhitespace(_current);
 426               
 427                   if (!*_current)
 428                   {
 429               	if (nullTerminator)
 430 mike     1.13 	    *nullTerminator = '\0';
 431               
 432               	if (!_stack.isEmpty())
 433               	    throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 434               
 435               	return false;
 436                   }
 437               
 438                   // Either a "<...>" or content begins next:
 439               
 440                   if (*_current == '<')
 441                   {
 442               	_current++;
 443               	_getElement(_current, entry);
 444               
 445               	if (nullTerminator)
 446               	    *nullTerminator = '\0';
 447               
 448               	if (entry.type == XmlEntry::START_TAG)
 449               	{
 450               	    if (_stack.isEmpty() && _foundRoot)
 451 mike     1.13 		throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 452               
 453               	    _foundRoot = true;
 454               	    _stack.push((char*)entry.text);
 455               	}
 456               	else if (entry.type == XmlEntry::END_TAG)
 457               	{
 458               	    if (_stack.isEmpty())
 459               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 460               
 461               	    if (strcmp(_stack.top(), entry.text) != 0)
 462               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 463               
 464               	    _stack.pop();
 465               	}
 466               
 467               	return true;
 468                   }
 469                   else
 470                   {
 471               	entry.type = XmlEntry::CONTENT;
 472 mike     1.13 	entry.text = _current;
 473               	_getContent(_current);
 474               	_restoreChar = *_current;
 475               	*_current = '\0';
 476               
 477               	if (nullTerminator)
 478               	    *nullTerminator = '\0';
 479               
 480               	_substituteReferences((char*)entry.text);
 481               	_normalize((char*)entry.text);
 482               
 483               	return true;
 484                   }
 485               }
 486               
 487               void XmlParser::putBack(XmlEntry& entry)
 488               {
 489                   _putBackStack.push(entry);
 490               }
 491               
 492               XmlParser::~XmlParser()
 493 mike     1.13 {
 494                   // Nothing to do!
 495               }
 496               
 497               void XmlParser::_skipWhitespace(char*& p)
 498               {
 499 chuck    1.26     while (*p && _isspace(*p))
 500 mike     1.13     {
 501               	if (*p == '\n')
 502               	    _line++;
 503               
 504               	p++;
 505                   }
 506               }
 507               
 508               Boolean XmlParser::_getElementName(char*& p)
 509               {
 510 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 511 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 512                         (*p == '_')))
 513 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 514 kumpf    1.24     p++;
 515 mike     1.13 
 516 david    1.22     while ((*p) &&
 517               	   (((*p >= 'A') && (*p <= 'Z')) ||
 518               	    ((*p >= 'a') && (*p <= 'z')) ||
 519 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 520 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 521 mike     1.13 	p++;
 522               
 523                   // The next character must be a space:
 524               
 525 chuck    1.26     if (_isspace(*p))
 526 mike     1.13     {
 527               	*p++ = '\0';
 528               	_skipWhitespace(p);
 529                   }
 530               
 531                   if (*p == '>')
 532                   {
 533               	*p++ = '\0';
 534               	return true;
 535                   }
 536               
 537                   return false;
 538               }
 539               
 540               Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 541               {
 542                   openCloseElement = false;
 543               
 544 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 545 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 546                         (*p == '_')))
 547 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 548 kumpf    1.24     p++;
 549 mike     1.13 
 550 david    1.22     while ((*p) &&
 551               	   (((*p >= 'A') && (*p <= 'Z')) ||
 552               	    ((*p >= 'a') && (*p <= 'z')) ||
 553 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 554 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 555 mike     1.13 	p++;
 556               
 557                   // The next character must be a space:
 558               
 559 chuck    1.26     if (_isspace(*p))
 560 mike     1.13     {
 561               	*p++ = '\0';
 562               	_skipWhitespace(p);
 563                   }
 564               
 565                   if (*p == '>')
 566                   {
 567               	*p++ = '\0';
 568               	return true;
 569                   }
 570               
 571                   if (p[0] == '/' && p[1] == '>')
 572                   {
 573               	openCloseElement = true;
 574               	*p = '\0';
 575               	p += 2;
 576               	return true;
 577                   }
 578               
 579                   return false;
 580               }
 581 mike     1.13 
 582               void XmlParser::_getAttributeNameAndEqual(char*& p)
 583               {
 584 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 585 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 586                         (*p == '_')))
 587 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 588 kumpf    1.24     p++;
 589 mike     1.13 
 590 david    1.22     while ((*p) &&
 591               	   (((*p >= 'A') && (*p <= 'Z')) ||
 592               	    ((*p >= 'a') && (*p <= 'z')) ||
 593 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 594 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 595 mike     1.13 	p++;
 596               
 597                   char* term = p;
 598               
 599                   _skipWhitespace(p);
 600               
 601                   if (*p != '=')
 602               	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 603               
 604                   p++;
 605               
 606                   _skipWhitespace(p);
 607               
 608                   *term = '\0';
 609               }
 610               
 611               void XmlParser::_getAttributeValue(char*& p)
 612               {
 613                   // ATTN-B: handle values contained in semiquotes:
 614               
 615                   if (*p != '"' && *p != '\'')
 616 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 617               
 618                   char startChar = *p++;
 619               
 620                   while (*p && *p != startChar)
 621               	p++;
 622               
 623                   if (*p != startChar)
 624               	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 625               
 626                   *p++ = '\0';
 627               }
 628               
 629               void XmlParser::_getComment(char*& p)
 630               {
 631                   // Now p points to first non-whitespace character beyond "<--" sequence:
 632               
 633                   for (; *p; p++)
 634                   {
 635               	if (p[0] == '-' && p[1] == '-')
 636               	{
 637 mike     1.13 	    if (p[2] != '>')
 638               	    {
 639               		throw XmlException(
 640               		    XmlException::MINUS_MINUS_IN_COMMENT, _line);
 641               	    }
 642               
 643               	    // Find end of comment (excluding whitespace):
 644               
 645               	    *p = '\0';
 646               	    p += 3;
 647               	    return;
 648               	}
 649                   }
 650               
 651                   // If it got this far, then the comment is unterminated:
 652               
 653                   throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 654               }
 655               
 656               void XmlParser::_getCData(char*& p)
 657               {
 658 mike     1.13     // At this point p points one past "<![CDATA[" sequence:
 659               
 660                   for (; *p; p++)
 661                   {
 662               	if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 663               	{
 664               	    *p = '\0';
 665               	    p += 3;
 666               	    return;
 667               	}
 668               	else if (*p == '\n')
 669               	    _line++;
 670                   }
 671               
 672                   // If it got this far, then the comment is unterminated:
 673               
 674                   throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 675               }
 676               
 677               void XmlParser::_getDocType(char*& p)
 678               {
 679 mike     1.13     // Just ignore the DOCTYPE command for now:
 680               
 681                   for (; *p && *p != '>'; p++)
 682                   {
 683               	if (*p == '\n')
 684               	    _line++;
 685                   }
 686               
 687                   if (*p != '>')
 688               	throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 689               
 690                   p++;
 691               }
 692               
 693               void XmlParser::_getContent(char*& p)
 694               {
 695                   while (*p && *p != '<')
 696                   {
 697               	if (*p == '\n')
 698               	    _line++;
 699               
 700 mike     1.13 	p++;
 701                   }
 702               }
 703               
 704               void XmlParser::_substituteReferences(char* text)
 705               {
 706                   Uint32 rem = strlen(text);
 707               
 708                   for (char* p = text; *p; p++, rem--)
 709                   {
 710               	if (*p == '&')
 711               	{
 712 kumpf    1.18             // Process character or entity reference
 713 mike     1.13 
 714 kumpf    1.18             Uint16 referenceChar = 0;
 715                           Uint32 referenceLength = 0;
 716                           XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 717               
 718                           if (*(p+1) == '#')
 719                           {
 720                               // Found a character (numeric) reference
 721                               // Determine whether it is decimal or hex
 722                               if (*(p+2) == 'x')
 723                               {
 724                                   // Decode a hexadecimal character reference
 725                                   char* q = p+3;
 726               
 727                                   // At most four digits are allowed, plus trailing ';'
 728                                   Uint32 numDigits;
 729                                   for (numDigits = 0; numDigits < 5; numDigits++, q++)
 730                                   {
 731                                       if (isdigit(*q))
 732                                       {
 733                                           referenceChar = (referenceChar << 4);
 734                                           referenceChar += (*q - '0');
 735 kumpf    1.18                         }
 736                                       else if ((*q >= 'A') && (*q <= 'F'))
 737                                       {
 738                                           referenceChar = (referenceChar << 4);
 739                                           referenceChar += (*q - 'A' + 10);
 740                                       }
 741                                       else if ((*q >= 'a') && (*q <= 'f'))
 742                                       {
 743                                           referenceChar = (referenceChar << 4);
 744                                           referenceChar += (*q - 'a' + 10);
 745                                       }
 746                                       else if (*q == ';')
 747                                       {
 748                                           break;
 749                                       }
 750                                       else
 751                                       {
 752                                           throw XmlException(code, _line);
 753                                       }
 754                                   }
 755               
 756 kumpf    1.18                     // Hex number must be 1 - 4 digits
 757                                   if ((numDigits == 0) || (numDigits > 4))
 758                                   {
 759                                       throw XmlException(code, _line);
 760                                   }
 761               
 762                                   // ATTN: Currently do not support 16-bit characters
 763                                   if (referenceChar > 0xff)
 764                                   {
 765                                       // ATTN: Is there a good way to say "unsupported"?
 766                                       throw XmlException(code, _line);
 767                                   }
 768               
 769                                   referenceLength = numDigits + 4;
 770                               }
 771                               else
 772                               {
 773                                   // Decode a decimal character reference
 774                                   Uint32 newChar = 0;
 775                                   char* q = p+2;
 776               
 777 kumpf    1.18                     // At most five digits are allowed, plus trailing ';'
 778                                   Uint32 numDigits;
 779                                   for (numDigits = 0; numDigits < 6; numDigits++, q++)
 780                                   {
 781                                       if (isdigit(*q))
 782                                       {
 783                                           newChar = (newChar * 10);
 784                                           newChar += (*q - '0');
 785                                       }
 786                                       else if (*q == ';')
 787                                       {
 788                                           break;
 789                                       }
 790                                       else
 791                                       {
 792                                           throw XmlException(code, _line);
 793                                       }
 794                                   }
 795               
 796                                   // Decimal number must be 1 - 5 digits and fit in 16 bits
 797                                   if ((numDigits == 0) || (numDigits > 5) ||
 798 kumpf    1.18                         (newChar > 0xffff))
 799                                   {
 800                                       throw XmlException(code, _line);
 801                                   }
 802               
 803                                   // ATTN: Currently do not support 16-bit characters
 804                                   if (newChar > 0xff)
 805                                   {
 806                                       // ATTN: Is there a good way to say "unsupported"?
 807                                       throw XmlException(code, _line);
 808                                   }
 809               
 810                                   referenceChar = Uint16(newChar);
 811                                   referenceLength = numDigits + 3;
 812                               }
 813                           }
 814                           else
 815                           {
 816                               // Check for entity reference
 817                               // ATTN: Inefficient if many entity references are supported
 818                               Uint32 i;
 819 kumpf    1.18                 for (i = 0; i < _REFERENCES_SIZE; i++)
 820                               {
 821                                   Uint32 length = _references[i].length;
 822                                   const char* match = _references[i].match;
 823               
 824                                   if (strncmp(p, _references[i].match, length) == 0)
 825                                   {
 826                                       referenceChar = _references[i].replacement;
 827                                       referenceLength = length;
 828                                       break;
 829                                   }
 830                               }
 831               
 832                               if (i == _REFERENCES_SIZE)
 833                               {
 834                                   // Didn't recognize the entity reference
 835                                   // ATTN: Is there a good way to say "unsupported"?
 836                                   throw XmlException(code, _line);
 837                               }
 838                           }
 839               
 840 kumpf    1.18             // Replace the reference with the correct character
 841                           *p = (char)referenceChar;
 842                           char* q = p + referenceLength;
 843                           rem = rem - referenceLength + 1;
 844                           memmove(p + 1, q, rem);
 845 mike     1.13 	}
 846                   }
 847               }
 848               
 849               static const char _EMPTY_STRING[] = "";
 850               
 851               void XmlParser::_getElement(char*& p, XmlEntry& entry)
 852               {
 853                   entry.attributeCount = 0;
 854               
 855                   //--------------------------------------------------------------------------
 856                   // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 857                   //--------------------------------------------------------------------------
 858               
 859                   if (*p == '?')
 860                   {
 861               	entry.type = XmlEntry::XML_DECLARATION;
 862               	entry.text = ++p;
 863               
 864               	Boolean openCloseElement = false;
 865               
 866 mike     1.13 	if (_getElementName(p))
 867               	    return;
 868                   }
 869                   else if (*p == '!')
 870                   {
 871               	p++;
 872               
 873               	// Expect a comment or CDATA:
 874               
 875               	if (p[0] == '-' && p[1] == '-')
 876               	{
 877               	    p += 2;
 878               	    entry.type = XmlEntry::COMMENT;
 879               	    entry.text = p;
 880               	    _getComment(p);
 881               	    return;
 882               	}
 883               	else if (memcmp(p, "[CDATA[", 7) == 0)
 884               	{
 885               	    p += 7;
 886               	    entry.type = XmlEntry::CDATA;
 887 mike     1.13 	    entry.text = p;
 888               	    _getCData(p);
 889               	    return;
 890               	}
 891               	else if (memcmp(p, "DOCTYPE", 7) == 0)
 892               	{
 893               	    entry.type = XmlEntry::DOCTYPE;
 894               	    entry.text = _EMPTY_STRING;
 895               	    _getDocType(p);
 896               	    return;
 897               	}
 898               	throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 899                   }
 900                   else if (*p == '/')
 901                   {
 902               	entry.type = XmlEntry::END_TAG;
 903               	entry.text = ++p;
 904               
 905               	if (!_getElementName(p))
 906               	    throw(XmlException(XmlException::BAD_END_TAG, _line));
 907               
 908 mike     1.13 	return;
 909                   }
 910 david    1.25     else if ((((*p >= 'A') && (*p <= 'Z')) ||
 911 kumpf    1.24               ((*p >= 'a') && (*p <= 'z')) ||
 912                             (*p == '_')))
 913 mike     1.13     {
 914               	entry.type = XmlEntry::START_TAG;
 915               	entry.text = p;
 916               
 917               	Boolean openCloseElement = false;
 918               
 919               	if (_getOpenElementName(p, openCloseElement))
 920               	{
 921               	    if (openCloseElement)
 922               		entry.type = XmlEntry::EMPTY_TAG;
 923               	    return;
 924               	}
 925                   }
 926                   else
 927               	throw XmlException(XmlException::BAD_START_TAG, _line);
 928               
 929                   //--------------------------------------------------------------------------
 930                   // Grab all the attributes:
 931                   //--------------------------------------------------------------------------
 932               
 933                   for (;;)
 934 mike     1.13     {
 935               	if (entry.type == XmlEntry::XML_DECLARATION)
 936               	{
 937               	    if (p[0] == '?' && p[1] == '>')
 938               	    {
 939               		p += 2;
 940               		return;
 941               	    }
 942               	}
 943               	else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 944               	{
 945               	    entry.type = XmlEntry::EMPTY_TAG;
 946               	    p += 2;
 947               	    return;
 948               	}
 949               	else if (*p == '>')
 950               	{
 951               	    p++;
 952               	    return;
 953               	}
 954               
 955 mike     1.13 	XmlAttribute attr;
 956               	attr.name = p;
 957               	_getAttributeNameAndEqual(p);
 958               
 959               	if (*p != '"' && *p != '\'')
 960               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 961               
 962               	attr.value = p + 1;
 963               	_getAttributeValue(p);
 964               
 965               	if (entry.type == XmlEntry::XML_DECLARATION)
 966               	{
 967               	    // The next thing must a space or a "?>":
 968               
 969 chuck    1.26 	    if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
 970 mike     1.13 	    {
 971               		throw XmlException(
 972               		    XmlException::BAD_ATTRIBUTE_VALUE, _line);
 973               	    }
 974               	}
 975 chuck    1.26 	else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
 976 mike     1.13 	{
 977               	    // The next thing must be a space or a '>':
 978               
 979               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 980               	}
 981               
 982               	_skipWhitespace(p);
 983               
 984               	if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 985               	    throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 986               
 987               	_substituteReferences((char*)attr.value);
 988               	entry.attributes[entry.attributeCount++] = attr;
 989                   }
 990               }
 991               
 992               static const char* _typeStrings[] =
 993               {
 994                   "XML_DECLARATION", 
 995                   "START_TAG", 
 996                   "EMPTY_TAG", 
 997 mike     1.13     "END_TAG", 
 998                   "COMMENT",
 999                   "CDATA",
1000                   "DOCTYPE",
1001                   "CONTENT" 
1002               };
1003               
1004               void XmlEntry::print() const
1005               {
1006                   PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1007               
1008                   Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1009               
1010                   if (needQuotes)
1011               	PEGASUS_STD(cout) << "\"";
1012               	
1013                   _printValue(text);
1014               
1015                   if (needQuotes)
1016               	PEGASUS_STD(cout) << "\"";
1017               
1018 mike     1.13     PEGASUS_STD(cout) << '\n';
1019               
1020                   for (Uint32 i = 0; i < attributeCount; i++)
1021                   {
1022               	PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
1023               	_printValue(attributes[i].value);
1024               	PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1025                   }
1026               }
1027               
1028               const XmlAttribute* XmlEntry::findAttribute(
1029                   const char* name) const
1030               {
1031                   for (Uint32 i = 0; i < attributeCount; i++)
1032                   {
1033               	if (strcmp(attributes[i].name, name) == 0)
1034               	    return &attributes[i];
1035                   }
1036               
1037                   return 0;
1038               }
1039 mike     1.13 
1040               // Find first non-whitespace character (set first) and last non-whitespace
1041               // character (set last one past this). For example, consider this string:
1042               //
1043               //	"   87     "
1044               //
1045               // The first pointer would point to '8' and the last pointer woudl point one
1046               // beyond '7'.
1047               
1048               static void _findEnds(
1049                   const char* str, 
1050                   const char*& first, 
1051                   const char*& last)
1052               {
1053                   first = str;
1054               
1055 chuck    1.26     while (_isspace(*first))
1056 mike     1.13 	first++;
1057               
1058                   if (!*first)
1059                   {
1060               	last = first;
1061               	return;
1062                   }
1063               
1064                   last = first + strlen(first);
1065               
1066 chuck    1.26     while (last != first && _isspace(last[-1]))
1067 mike     1.13 	last--;
1068               }
1069               
1070               Boolean XmlEntry::getAttributeValue(
1071                   const char* name, 
1072                   Uint32& value) const
1073               {
1074                   const XmlAttribute* attr = findAttribute(name);
1075               
1076                   if (!attr)
1077               	return false;
1078               
1079                   const char* first;
1080                   const char* last;
1081                   _findEnds(attr->value, first, last);
1082               
1083                   char* end = 0;
1084                   long tmp = strtol(first, &end, 10);
1085               
1086                   if (!end || end != last)
1087               	return false;
1088 mike     1.13 
1089                   value = Uint32(tmp);
1090                   return true;
1091               }
1092               
1093               Boolean XmlEntry::getAttributeValue(
1094                   const char* name, 
1095                   Real32& value) const
1096               {
1097                   const XmlAttribute* attr = findAttribute(name);
1098               
1099                   if (!attr)
1100               	return false;
1101               
1102                   const char* first;
1103                   const char* last;
1104                   _findEnds(attr->value, first, last);
1105               
1106                   char* end = 0;
1107                   double tmp = strtod(first, &end);
1108               
1109 mike     1.13     if (!end || end != last)
1110               	return false;
1111               
1112                   value = Uint32(tmp);
1113                   return true;
1114               }
1115               
1116               Boolean XmlEntry::getAttributeValue(
1117                   const char* name, 
1118                   const char*& value) const
1119               {
1120                   const XmlAttribute* attr = findAttribute(name);
1121               
1122                   if (!attr)
1123               	return false;
1124               
1125                   value = attr->value;
1126                   return true;
1127               }
1128               
1129               Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1130 mike     1.13 {
1131                   const char* tmp;
1132               
1133                   if (!getAttributeValue(name, tmp))
1134               	return false;
1135               
1136 david    1.22     value = String(tmp,STRING_FLAG_UTF8);
1137 mike     1.13     return true;
1138               }
1139               
1140               void XmlAppendCString(Array<Sint8>& out, const char* str)
1141               {
1142                   out.append(str, strlen(str));
1143               }
1144               
1145               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2