(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.29 //%2004////////////////////////////////////////////////////////////////////////
   2 mike  1.13 //
   3 karl  1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.27 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 mike  1.13 //
  10            // Permission is hereby granted, free of charge, to any person obtaining a copy
  11 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
  12            // deal in the Software without restriction, including without limitation the
  13            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  14 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  15            // furnished to do so, subject to the following conditions:
  16            // 
  17 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  18 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  19            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  20 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  21            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  22            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  23 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25            //
  26            //==============================================================================
  27            //
  28            // Author: Mike Brasher (mbrasher@bmc.com)
  29            //
  30            // Modified By:
  31            //
  32            //%/////////////////////////////////////////////////////////////////////////////
  33            
  34            ////////////////////////////////////////////////////////////////////////////////
  35            //
  36            // XmlParser
  37            //
  38            //	This file contains a simple non-validating XML parser. Here are 
  39            //	serveral rules for well-formed XML:
  40            //
  41            //	    1.	Documents must begin with an XML declaration:
  42            //
  43            //		<?xml version="1.0" standalone="yes"?>
  44 mike  1.13 //
  45            //	    2.	Comments have the form:
  46            //
  47            //		<!-- blah blah blah -->
  48            //
  49            //	    3. The following entity references are supported:
  50            //
  51            //		&amp - ampersand
  52            //	 	&lt - less-than
  53            //		&gt - greater-than
  54            //		&quot - full quote
  55            //		&apos - apostrophe
  56            //
  57 kumpf 1.18 //             as well as character (numeric) references:
  58            
  59            //              &#49; - decimal reference for character '1'
  60            //              &#x31; - hexadecimal reference for character '1'
  61            //
  62 mike  1.13 //	    4. Element names and attribute names take the following form:
  63            //
  64            //		[A-Za-z_][A-Za-z_0-9-.:]
  65            //
  66            //	    5.	Arbitrary data (CDATA) can be enclosed like this:
  67            //
  68            //		    <![CDATA[
  69            //		    ...
  70            //		    ]]>
  71            //
  72            //	    6.	Element names and attributes names are case-sensitive.
  73            //
  74            //	    7.	XmlAttribute values must be delimited by full or half quotes.
  75            //		XmlAttribute values must be delimited.
  76            //
  77            //	    8.  <!DOCTYPE...>
  78            //
  79            // TODO:
  80            //
  81 karl  1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  82 mike  1.13 //	Handle <!DOCTYPE...> sections which are complicated (containing
  83            //        rules rather than references to files).
  84            //
  85            //	Remove newlines from string literals:
  86            //
  87            //          Example: <xyz x="hello
  88            //		world">
  89            //
  90            ////////////////////////////////////////////////////////////////////////////////
  91            
  92 sage  1.14 #include <Pegasus/Common/Config.h>
  93 mike  1.13 #include <cctype>
  94            #include <cstdio>
  95            #include <cstdlib>
  96            #include <cstring>
  97            #include "XmlParser.h"
  98            #include "Logger.h"
  99 chuck 1.19 #include "ExceptionRep.h"
 100 mike  1.13 
 101            PEGASUS_NAMESPACE_BEGIN
 102            
 103            #define PEGASUS_ARRAY_T XmlEntry
 104            # include "ArrayImpl.h"
 105            #undef PEGASUS_ARRAY_T
 106            
 107            
 108            ////////////////////////////////////////////////////////////////////////////////
 109            //
 110            // Static helper functions
 111            //
 112            ////////////////////////////////////////////////////////////////////////////////
 113            
 114            static void _printValue(const char* p)
 115            {
 116                for (; *p; p++)
 117                {
 118            	if (*p == '\n')
 119            	    PEGASUS_STD(cout) << "\\n";
 120            	else if (*p == '\r')
 121 mike  1.13 	    PEGASUS_STD(cout) << "\\r";
 122            	else if (*p == '\t')
 123            	    PEGASUS_STD(cout) << "\\t";
 124            	else
 125            	    PEGASUS_STD(cout) << *p;
 126                }
 127            }
 128            
 129            struct EntityReference
 130            {
 131                const char* match;
 132                Uint32 length;
 133                char replacement;
 134            };
 135            
 136 kumpf 1.18 // ATTN: Add support for more entity references
 137 mike  1.13 static EntityReference _references[] =
 138            {
 139                { "&amp;", 5, '&' },
 140                { "&lt;", 4, '<' },
 141                { "&gt;", 4, '>' },
 142                { "&quot;", 6, '"' },
 143                { "&apos;", 6, '\'' }
 144            };
 145            
 146 chuck 1.26 
 147            // Implements a check for a whitespace character, without calling
 148            // isspace( ).  The isspace( ) function is locale-sensitive,
 149            // and incorrectly flags some chars above 0x7f as whitespace.  This
 150            // causes the XmlParser to incorrectly parse UTF-8 data.
 151            //
 152            // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
 153            // defines white space as:
 154            // S    ::=    (#x20 | #x9 | #xD | #xA)+ 
 155            static int _isspace(char c)
 156            {
 157            	if (c == ' ' || c == '\r' || c == '\t' || c == '\n')
 158            		return 1;
 159            	return 0;
 160            }
 161            
 162            
 163 mike  1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 164            
 165            // Remove all redundant spaces from the given string:
 166            
 167            static void _normalize(char* text)
 168            {
 169                Uint32 length = strlen(text);
 170                char* p = text;
 171                char* end = p + length;
 172            
 173                // Remove leading spaces:
 174            
 175 chuck 1.26     while (_isspace(*p))
 176            		p++;
 177 mike  1.13 
 178                if (p != text)
 179            	memmove(text, p, end - p + 1);
 180            
 181                p = text;
 182            
 183                // Look for sequences of more than one space and remove all but one.
 184            
 185                for (;;)
 186                {
 187            	// Advance to the next space:
 188            
 189 chuck 1.26 	while (*p && !_isspace(*p))
 190 mike  1.13 	    p++;
 191            
 192            	if (!*p)
 193            	    break;
 194            
 195            	// Advance to the next non-space:
 196            
 197            	char* q = p++;
 198            
 199 chuck 1.26 	while (_isspace(*p))
 200 mike  1.13 	    p++;
 201            
 202            	// Discard trailing spaces (if we are at the end):
 203            
 204            	if (!*p)
 205            	{
 206            	    *q = '\0';
 207            	    break;
 208            	}
 209            
 210            	// Remove the redundant spaces:
 211            
 212            	Uint32 n = p - q;
 213            
 214            	if (n > 1)
 215            	{
 216            	    *q++ = ' ';
 217            	    memmove(q, p, end - p + 1);
 218            	    p = q;
 219            	}
 220                }
 221 mike  1.13 }
 222            
 223            ////////////////////////////////////////////////////////////////////////////////
 224            //
 225            // XmlException
 226            //
 227            ////////////////////////////////////////////////////////////////////////////////
 228            
 229            static const char* _xmlMessages[] =
 230            {
 231                "Bad opening element",
 232                "Bad closing element",
 233                "Bad attribute name",
 234                "Exepected equal sign",
 235                "Bad attribute value",
 236                "A \"--\" sequence found within comment",
 237                "Unterminated comment",
 238                "Unterminated CDATA block",
 239                "Unterminated DOCTYPE",
 240                "Too many attributes: parser only handles 10",
 241                "Malformed reference",
 242 mike  1.13     "Expected a comment or CDATA following \"<!\" sequence",
 243                "Closing element does not match opening element",
 244                "One or more tags are still open",
 245                "More than one root element was encountered",
 246                "Validation error",
 247                "Semantic error"
 248            };
 249            
 250 chuck 1.19 static const char* _xmlKeys[] = 
 251            {
 252 humberto 1.20     "Common.XmlParser.BAD_START_TAG",
 253 chuck    1.19     "Common.XmlParser.BAD_END_TAG",
 254                   "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 255                   "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 256                   "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 257                   "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 258                   "Common.XmlParser.UNTERMINATED_COMMENT",
 259                   "Common.XmlParser.UNTERMINATED_CDATA",
 260                   "Common.XmlParser.UNTERMINATED_DOCTYPE",
 261                   "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 262                   "Common.XmlParser.MALFORMED_REFERENCE",
 263                   "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 264                   "Common.XmlParser.START_END_MISMATCH",
 265                   "Common.XmlParser.UNCLOSED_TAGS", 
 266                   "Common.XmlParser.MULTIPLE_ROOTS",
 267                   "Common.XmlParser.VALIDATION_ERROR",
 268                   "Common.XmlParser.SEMANTIC_ERROR"
 269               };
 270               
 271 chuck    1.23 // l10n replace _formMessage (comment out the old one)
 272 chuck    1.19 /*
 273 mike     1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 274               {
 275                   String result = _xmlMessages[Uint32(code) - 1];
 276               
 277                   char buffer[32];
 278                   sprintf(buffer, "%d", line);
 279                   result.append(": on line ");
 280                   result.append(buffer);
 281               
 282                   if (message.size())
 283                   {
 284               	result.append(": ");
 285               	result.append(message);
 286                   }
 287               
 288                   return result;
 289               }
 290 chuck    1.19 */
 291               
 292               static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 293               {
 294                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 295                   String key = _xmlKeys[Uint32(code) - 1];
 296               	String msg = message;
 297               
 298                   dftMsg.append(": on line $0");
 299                   if (message.size())
 300                   {
 301 humberto 1.20     	msg = ": " + msg;
 302 chuck    1.19     	dftMsg.append("$1");
 303                   }    
 304               
 305                   return MessageLoaderParms(key, dftMsg, line ,msg);
 306               }
 307               
 308               static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 309               {
 310                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 311                   String key = _xmlKeys[Uint32(code) - 1];
 312               
 313                   dftMsg.append(": on line $0");
 314                
 315                   return MessageLoaderParms(key, dftMsg, line);
 316               }
 317               
 318 mike     1.13 
 319               XmlException::XmlException(
 320                   XmlException::Code code, 
 321                   Uint32 lineNumber,
 322                   const String& message) 
 323                   : Exception(_formMessage(code, lineNumber, message))
 324               {
 325               
 326               }
 327               
 328 chuck    1.19 
 329               XmlException::XmlException(
 330                   XmlException::Code code, 
 331                   Uint32 lineNumber,
 332                   MessageLoaderParms& msgParms) 
 333                   : Exception(_formPartialMessage(code, lineNumber))
 334               {
 335 humberto 1.21 	if (msgParms.default_msg.size())
 336                   {
 337                   	msgParms.default_msg = ": " + msgParms.default_msg;
 338                   } 
 339 chuck    1.19 	_rep->message.append(MessageLoader::getMessage(msgParms));
 340               }
 341               
 342               
 343 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 344               //
 345               // XmlValidationError
 346               //
 347               ////////////////////////////////////////////////////////////////////////////////
 348               
 349               XmlValidationError::XmlValidationError(
 350                   Uint32 lineNumber,
 351                   const String& message)
 352                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 353               {
 354               
 355               }
 356               
 357 chuck    1.19 
 358               XmlValidationError::XmlValidationError(
 359                   Uint32 lineNumber,
 360                   MessageLoaderParms& msgParms)
 361                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 362               {
 363               
 364               }
 365               
 366               
 367 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 368               //
 369               // XmlSemanticError
 370               //
 371               ////////////////////////////////////////////////////////////////////////////////
 372               
 373               XmlSemanticError::XmlSemanticError(
 374                   Uint32 lineNumber,
 375                   const String& message)
 376                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 377               {
 378               
 379               }
 380 chuck    1.19 
 381               
 382               XmlSemanticError::XmlSemanticError(
 383                   Uint32 lineNumber,
 384                   MessageLoaderParms& msgParms)
 385                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 386               {
 387               
 388               }
 389               
 390 mike     1.13 
 391               ////////////////////////////////////////////////////////////////////////////////
 392               //
 393               // XmlParser
 394               //
 395               ////////////////////////////////////////////////////////////////////////////////
 396               
 397               XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 
 398                   _restoreChar('\0'), _foundRoot(false)
 399               {
 400               
 401               }
 402               
 403               Boolean XmlParser::next(XmlEntry& entry)
 404               {
 405                   if (!_putBackStack.isEmpty())
 406                   {
 407               	entry = _putBackStack.top();
 408               	_putBackStack.pop();
 409               	return true;
 410                   }
 411 mike     1.13 
 412                   // If a character was overwritten with a null-terminator the last
 413                   // time this routine was called, then put back that character. Before
 414                   // exiting of course, restore the null-terminator.
 415               
 416                   char* nullTerminator = 0;
 417               
 418                   if (_restoreChar && !*_current)
 419                   {
 420               	nullTerminator = _current;
 421               	*_current = _restoreChar;
 422               	_restoreChar = '\0';
 423                   }
 424               
 425                   // Skip over any whitespace:
 426               
 427                   _skipWhitespace(_current);
 428               
 429                   if (!*_current)
 430                   {
 431               	if (nullTerminator)
 432 mike     1.13 	    *nullTerminator = '\0';
 433               
 434               	if (!_stack.isEmpty())
 435               	    throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 436               
 437               	return false;
 438                   }
 439               
 440                   // Either a "<...>" or content begins next:
 441               
 442                   if (*_current == '<')
 443                   {
 444               	_current++;
 445               	_getElement(_current, entry);
 446               
 447               	if (nullTerminator)
 448               	    *nullTerminator = '\0';
 449               
 450               	if (entry.type == XmlEntry::START_TAG)
 451               	{
 452               	    if (_stack.isEmpty() && _foundRoot)
 453 mike     1.13 		throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 454               
 455               	    _foundRoot = true;
 456               	    _stack.push((char*)entry.text);
 457               	}
 458               	else if (entry.type == XmlEntry::END_TAG)
 459               	{
 460               	    if (_stack.isEmpty())
 461               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 462               
 463               	    if (strcmp(_stack.top(), entry.text) != 0)
 464               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 465               
 466               	    _stack.pop();
 467               	}
 468               
 469               	return true;
 470                   }
 471                   else
 472                   {
 473               	entry.type = XmlEntry::CONTENT;
 474 mike     1.13 	entry.text = _current;
 475               	_getContent(_current);
 476               	_restoreChar = *_current;
 477               	*_current = '\0';
 478               
 479               	if (nullTerminator)
 480               	    *nullTerminator = '\0';
 481               
 482               	_substituteReferences((char*)entry.text);
 483               	_normalize((char*)entry.text);
 484               
 485               	return true;
 486                   }
 487               }
 488               
 489               void XmlParser::putBack(XmlEntry& entry)
 490               {
 491                   _putBackStack.push(entry);
 492               }
 493               
 494               XmlParser::~XmlParser()
 495 mike     1.13 {
 496                   // Nothing to do!
 497               }
 498               
 499               void XmlParser::_skipWhitespace(char*& p)
 500               {
 501 chuck    1.26     while (*p && _isspace(*p))
 502 mike     1.13     {
 503               	if (*p == '\n')
 504               	    _line++;
 505               
 506               	p++;
 507                   }
 508               }
 509               
 510               Boolean XmlParser::_getElementName(char*& p)
 511               {
 512 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 513 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 514                         (*p == '_')))
 515 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 516 kumpf    1.24     p++;
 517 mike     1.13 
 518 david    1.22     while ((*p) &&
 519               	   (((*p >= 'A') && (*p <= 'Z')) ||
 520               	    ((*p >= 'a') && (*p <= 'z')) ||
 521 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 522 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 523 mike     1.13 	p++;
 524               
 525                   // The next character must be a space:
 526               
 527 chuck    1.26     if (_isspace(*p))
 528 mike     1.13     {
 529               	*p++ = '\0';
 530               	_skipWhitespace(p);
 531                   }
 532               
 533                   if (*p == '>')
 534                   {
 535               	*p++ = '\0';
 536               	return true;
 537                   }
 538               
 539                   return false;
 540               }
 541               
 542               Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 543               {
 544                   openCloseElement = false;
 545               
 546 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 547 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 548                         (*p == '_')))
 549 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 550 kumpf    1.24     p++;
 551 mike     1.13 
 552 david    1.22     while ((*p) &&
 553               	   (((*p >= 'A') && (*p <= 'Z')) ||
 554               	    ((*p >= 'a') && (*p <= 'z')) ||
 555 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 556 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 557 mike     1.13 	p++;
 558               
 559                   // The next character must be a space:
 560               
 561 chuck    1.26     if (_isspace(*p))
 562 mike     1.13     {
 563               	*p++ = '\0';
 564               	_skipWhitespace(p);
 565                   }
 566               
 567                   if (*p == '>')
 568                   {
 569               	*p++ = '\0';
 570               	return true;
 571                   }
 572               
 573                   if (p[0] == '/' && p[1] == '>')
 574                   {
 575               	openCloseElement = true;
 576               	*p = '\0';
 577               	p += 2;
 578               	return true;
 579                   }
 580               
 581                   return false;
 582               }
 583 mike     1.13 
 584               void XmlParser::_getAttributeNameAndEqual(char*& p)
 585               {
 586 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 587 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 588                         (*p == '_')))
 589 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 590 kumpf    1.24     p++;
 591 mike     1.13 
 592 david    1.22     while ((*p) &&
 593               	   (((*p >= 'A') && (*p <= 'Z')) ||
 594               	    ((*p >= 'a') && (*p <= 'z')) ||
 595 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 596 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 597 mike     1.13 	p++;
 598               
 599                   char* term = p;
 600               
 601                   _skipWhitespace(p);
 602               
 603                   if (*p != '=')
 604               	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 605               
 606                   p++;
 607               
 608                   _skipWhitespace(p);
 609               
 610                   *term = '\0';
 611               }
 612               
 613               void XmlParser::_getAttributeValue(char*& p)
 614               {
 615                   // ATTN-B: handle values contained in semiquotes:
 616               
 617                   if (*p != '"' && *p != '\'')
 618 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 619               
 620                   char startChar = *p++;
 621               
 622                   while (*p && *p != startChar)
 623               	p++;
 624               
 625                   if (*p != startChar)
 626               	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 627               
 628                   *p++ = '\0';
 629               }
 630               
 631               void XmlParser::_getComment(char*& p)
 632               {
 633                   // Now p points to first non-whitespace character beyond "<--" sequence:
 634               
 635                   for (; *p; p++)
 636                   {
 637               	if (p[0] == '-' && p[1] == '-')
 638               	{
 639 mike     1.13 	    if (p[2] != '>')
 640               	    {
 641               		throw XmlException(
 642               		    XmlException::MINUS_MINUS_IN_COMMENT, _line);
 643               	    }
 644               
 645               	    // Find end of comment (excluding whitespace):
 646               
 647               	    *p = '\0';
 648               	    p += 3;
 649               	    return;
 650               	}
 651                   }
 652               
 653                   // If it got this far, then the comment is unterminated:
 654               
 655                   throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 656               }
 657               
 658               void XmlParser::_getCData(char*& p)
 659               {
 660 mike     1.13     // At this point p points one past "<![CDATA[" sequence:
 661               
 662                   for (; *p; p++)
 663                   {
 664               	if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 665               	{
 666               	    *p = '\0';
 667               	    p += 3;
 668               	    return;
 669               	}
 670               	else if (*p == '\n')
 671               	    _line++;
 672                   }
 673               
 674                   // If it got this far, then the comment is unterminated:
 675               
 676                   throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 677               }
 678               
 679               void XmlParser::_getDocType(char*& p)
 680               {
 681 mike     1.13     // Just ignore the DOCTYPE command for now:
 682               
 683                   for (; *p && *p != '>'; p++)
 684                   {
 685               	if (*p == '\n')
 686               	    _line++;
 687                   }
 688               
 689                   if (*p != '>')
 690               	throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 691               
 692                   p++;
 693               }
 694               
 695               void XmlParser::_getContent(char*& p)
 696               {
 697                   while (*p && *p != '<')
 698                   {
 699               	if (*p == '\n')
 700               	    _line++;
 701               
 702 mike     1.13 	p++;
 703                   }
 704               }
 705               
 706               void XmlParser::_substituteReferences(char* text)
 707               {
 708                   Uint32 rem = strlen(text);
 709               
 710                   for (char* p = text; *p; p++, rem--)
 711                   {
 712               	if (*p == '&')
 713               	{
 714 kumpf    1.18             // Process character or entity reference
 715 mike     1.13 
 716 kumpf    1.18             Uint16 referenceChar = 0;
 717                           Uint32 referenceLength = 0;
 718                           XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 719               
 720                           if (*(p+1) == '#')
 721                           {
 722                               // Found a character (numeric) reference
 723                               // Determine whether it is decimal or hex
 724                               if (*(p+2) == 'x')
 725                               {
 726                                   // Decode a hexadecimal character reference
 727                                   char* q = p+3;
 728               
 729                                   // At most four digits are allowed, plus trailing ';'
 730                                   Uint32 numDigits;
 731                                   for (numDigits = 0; numDigits < 5; numDigits++, q++)
 732                                   {
 733                                       if (isdigit(*q))
 734                                       {
 735                                           referenceChar = (referenceChar << 4);
 736                                           referenceChar += (*q - '0');
 737 kumpf    1.18                         }
 738                                       else if ((*q >= 'A') && (*q <= 'F'))
 739                                       {
 740                                           referenceChar = (referenceChar << 4);
 741                                           referenceChar += (*q - 'A' + 10);
 742                                       }
 743                                       else if ((*q >= 'a') && (*q <= 'f'))
 744                                       {
 745                                           referenceChar = (referenceChar << 4);
 746                                           referenceChar += (*q - 'a' + 10);
 747                                       }
 748                                       else if (*q == ';')
 749                                       {
 750                                           break;
 751                                       }
 752                                       else
 753                                       {
 754                                           throw XmlException(code, _line);
 755                                       }
 756                                   }
 757               
 758 kumpf    1.18                     // Hex number must be 1 - 4 digits
 759                                   if ((numDigits == 0) || (numDigits > 4))
 760                                   {
 761                                       throw XmlException(code, _line);
 762                                   }
 763               
 764                                   // ATTN: Currently do not support 16-bit characters
 765                                   if (referenceChar > 0xff)
 766                                   {
 767                                       // ATTN: Is there a good way to say "unsupported"?
 768                                       throw XmlException(code, _line);
 769                                   }
 770               
 771                                   referenceLength = numDigits + 4;
 772                               }
 773                               else
 774                               {
 775                                   // Decode a decimal character reference
 776                                   Uint32 newChar = 0;
 777                                   char* q = p+2;
 778               
 779 kumpf    1.18                     // At most five digits are allowed, plus trailing ';'
 780                                   Uint32 numDigits;
 781                                   for (numDigits = 0; numDigits < 6; numDigits++, q++)
 782                                   {
 783                                       if (isdigit(*q))
 784                                       {
 785                                           newChar = (newChar * 10);
 786                                           newChar += (*q - '0');
 787                                       }
 788                                       else if (*q == ';')
 789                                       {
 790                                           break;
 791                                       }
 792                                       else
 793                                       {
 794                                           throw XmlException(code, _line);
 795                                       }
 796                                   }
 797               
 798                                   // Decimal number must be 1 - 5 digits and fit in 16 bits
 799                                   if ((numDigits == 0) || (numDigits > 5) ||
 800 kumpf    1.18                         (newChar > 0xffff))
 801                                   {
 802                                       throw XmlException(code, _line);
 803                                   }
 804               
 805                                   // ATTN: Currently do not support 16-bit characters
 806                                   if (newChar > 0xff)
 807                                   {
 808                                       // ATTN: Is there a good way to say "unsupported"?
 809                                       throw XmlException(code, _line);
 810                                   }
 811               
 812                                   referenceChar = Uint16(newChar);
 813                                   referenceLength = numDigits + 3;
 814                               }
 815                           }
 816                           else
 817                           {
 818                               // Check for entity reference
 819                               // ATTN: Inefficient if many entity references are supported
 820                               Uint32 i;
 821 kumpf    1.18                 for (i = 0; i < _REFERENCES_SIZE; i++)
 822                               {
 823                                   Uint32 length = _references[i].length;
 824                                   const char* match = _references[i].match;
 825               
 826                                   if (strncmp(p, _references[i].match, length) == 0)
 827                                   {
 828                                       referenceChar = _references[i].replacement;
 829                                       referenceLength = length;
 830                                       break;
 831                                   }
 832                               }
 833               
 834                               if (i == _REFERENCES_SIZE)
 835                               {
 836                                   // Didn't recognize the entity reference
 837                                   // ATTN: Is there a good way to say "unsupported"?
 838                                   throw XmlException(code, _line);
 839                               }
 840                           }
 841               
 842 kumpf    1.18             // Replace the reference with the correct character
 843                           *p = (char)referenceChar;
 844                           char* q = p + referenceLength;
 845                           rem = rem - referenceLength + 1;
 846                           memmove(p + 1, q, rem);
 847 mike     1.13 	}
 848                   }
 849               }
 850               
 851               static const char _EMPTY_STRING[] = "";
 852               
 853               void XmlParser::_getElement(char*& p, XmlEntry& entry)
 854               {
 855                   entry.attributeCount = 0;
 856               
 857                   //--------------------------------------------------------------------------
 858                   // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 859                   //--------------------------------------------------------------------------
 860               
 861                   if (*p == '?')
 862                   {
 863               	entry.type = XmlEntry::XML_DECLARATION;
 864               	entry.text = ++p;
 865               
 866               	Boolean openCloseElement = false;
 867               
 868 mike     1.13 	if (_getElementName(p))
 869               	    return;
 870                   }
 871                   else if (*p == '!')
 872                   {
 873               	p++;
 874               
 875               	// Expect a comment or CDATA:
 876               
 877               	if (p[0] == '-' && p[1] == '-')
 878               	{
 879               	    p += 2;
 880               	    entry.type = XmlEntry::COMMENT;
 881               	    entry.text = p;
 882               	    _getComment(p);
 883               	    return;
 884               	}
 885               	else if (memcmp(p, "[CDATA[", 7) == 0)
 886               	{
 887               	    p += 7;
 888               	    entry.type = XmlEntry::CDATA;
 889 mike     1.13 	    entry.text = p;
 890               	    _getCData(p);
 891               	    return;
 892               	}
 893               	else if (memcmp(p, "DOCTYPE", 7) == 0)
 894               	{
 895               	    entry.type = XmlEntry::DOCTYPE;
 896               	    entry.text = _EMPTY_STRING;
 897               	    _getDocType(p);
 898               	    return;
 899               	}
 900               	throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 901                   }
 902                   else if (*p == '/')
 903                   {
 904               	entry.type = XmlEntry::END_TAG;
 905               	entry.text = ++p;
 906               
 907               	if (!_getElementName(p))
 908               	    throw(XmlException(XmlException::BAD_END_TAG, _line));
 909               
 910 mike     1.13 	return;
 911                   }
 912 david    1.25     else if ((((*p >= 'A') && (*p <= 'Z')) ||
 913 kumpf    1.24               ((*p >= 'a') && (*p <= 'z')) ||
 914                             (*p == '_')))
 915 mike     1.13     {
 916               	entry.type = XmlEntry::START_TAG;
 917               	entry.text = p;
 918               
 919               	Boolean openCloseElement = false;
 920               
 921               	if (_getOpenElementName(p, openCloseElement))
 922               	{
 923               	    if (openCloseElement)
 924               		entry.type = XmlEntry::EMPTY_TAG;
 925               	    return;
 926               	}
 927                   }
 928                   else
 929               	throw XmlException(XmlException::BAD_START_TAG, _line);
 930               
 931                   //--------------------------------------------------------------------------
 932                   // Grab all the attributes:
 933                   //--------------------------------------------------------------------------
 934               
 935                   for (;;)
 936 mike     1.13     {
 937               	if (entry.type == XmlEntry::XML_DECLARATION)
 938               	{
 939               	    if (p[0] == '?' && p[1] == '>')
 940               	    {
 941               		p += 2;
 942               		return;
 943               	    }
 944               	}
 945               	else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 946               	{
 947               	    entry.type = XmlEntry::EMPTY_TAG;
 948               	    p += 2;
 949               	    return;
 950               	}
 951               	else if (*p == '>')
 952               	{
 953               	    p++;
 954               	    return;
 955               	}
 956               
 957 mike     1.13 	XmlAttribute attr;
 958               	attr.name = p;
 959               	_getAttributeNameAndEqual(p);
 960               
 961               	if (*p != '"' && *p != '\'')
 962               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 963               
 964               	attr.value = p + 1;
 965               	_getAttributeValue(p);
 966               
 967               	if (entry.type == XmlEntry::XML_DECLARATION)
 968               	{
 969               	    // The next thing must a space or a "?>":
 970               
 971 chuck    1.26 	    if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
 972 mike     1.13 	    {
 973               		throw XmlException(
 974               		    XmlException::BAD_ATTRIBUTE_VALUE, _line);
 975               	    }
 976               	}
 977 chuck    1.26 	else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
 978 mike     1.13 	{
 979               	    // The next thing must be a space or a '>':
 980               
 981               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 982               	}
 983               
 984               	_skipWhitespace(p);
 985               
 986               	if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 987               	    throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 988               
 989               	_substituteReferences((char*)attr.value);
 990               	entry.attributes[entry.attributeCount++] = attr;
 991                   }
 992               }
 993               
 994               static const char* _typeStrings[] =
 995               {
 996                   "XML_DECLARATION", 
 997                   "START_TAG", 
 998                   "EMPTY_TAG", 
 999 mike     1.13     "END_TAG", 
1000                   "COMMENT",
1001                   "CDATA",
1002                   "DOCTYPE",
1003                   "CONTENT" 
1004               };
1005               
1006               void XmlEntry::print() const
1007               {
1008                   PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1009               
1010                   Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1011               
1012                   if (needQuotes)
1013               	PEGASUS_STD(cout) << "\"";
1014               	
1015                   _printValue(text);
1016               
1017                   if (needQuotes)
1018               	PEGASUS_STD(cout) << "\"";
1019               
1020 mike     1.13     PEGASUS_STD(cout) << '\n';
1021               
1022                   for (Uint32 i = 0; i < attributeCount; i++)
1023                   {
1024               	PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
1025               	_printValue(attributes[i].value);
1026               	PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1027                   }
1028               }
1029               
1030               const XmlAttribute* XmlEntry::findAttribute(
1031                   const char* name) const
1032               {
1033                   for (Uint32 i = 0; i < attributeCount; i++)
1034                   {
1035               	if (strcmp(attributes[i].name, name) == 0)
1036               	    return &attributes[i];
1037                   }
1038               
1039                   return 0;
1040               }
1041 mike     1.13 
1042               // Find first non-whitespace character (set first) and last non-whitespace
1043               // character (set last one past this). For example, consider this string:
1044               //
1045               //	"   87     "
1046               //
1047               // The first pointer would point to '8' and the last pointer woudl point one
1048               // beyond '7'.
1049               
1050               static void _findEnds(
1051                   const char* str, 
1052                   const char*& first, 
1053                   const char*& last)
1054               {
1055                   first = str;
1056               
1057 chuck    1.26     while (_isspace(*first))
1058 mike     1.13 	first++;
1059               
1060                   if (!*first)
1061                   {
1062               	last = first;
1063               	return;
1064                   }
1065               
1066                   last = first + strlen(first);
1067               
1068 chuck    1.26     while (last != first && _isspace(last[-1]))
1069 mike     1.13 	last--;
1070               }
1071               
1072               Boolean XmlEntry::getAttributeValue(
1073                   const char* name, 
1074                   Uint32& value) const
1075               {
1076                   const XmlAttribute* attr = findAttribute(name);
1077               
1078                   if (!attr)
1079               	return false;
1080               
1081                   const char* first;
1082                   const char* last;
1083                   _findEnds(attr->value, first, last);
1084               
1085                   char* end = 0;
1086                   long tmp = strtol(first, &end, 10);
1087               
1088                   if (!end || end != last)
1089               	return false;
1090 mike     1.13 
1091                   value = Uint32(tmp);
1092                   return true;
1093               }
1094               
1095               Boolean XmlEntry::getAttributeValue(
1096                   const char* name, 
1097                   Real32& value) const
1098               {
1099                   const XmlAttribute* attr = findAttribute(name);
1100               
1101                   if (!attr)
1102               	return false;
1103               
1104                   const char* first;
1105                   const char* last;
1106                   _findEnds(attr->value, first, last);
1107               
1108                   char* end = 0;
1109                   double tmp = strtod(first, &end);
1110               
1111 mike     1.13     if (!end || end != last)
1112               	return false;
1113               
1114                   value = Uint32(tmp);
1115                   return true;
1116               }
1117               
1118               Boolean XmlEntry::getAttributeValue(
1119                   const char* name, 
1120                   const char*& value) const
1121               {
1122                   const XmlAttribute* attr = findAttribute(name);
1123               
1124                   if (!attr)
1125               	return false;
1126               
1127                   value = attr->value;
1128                   return true;
1129               }
1130               
1131               Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1132 mike     1.13 {
1133                   const char* tmp;
1134               
1135                   if (!getAttributeValue(name, tmp))
1136               	return false;
1137               
1138 chuck    1.28     value = String(tmp);
1139 mike     1.13     return true;
1140               }
1141               
1142               void XmlAppendCString(Array<Sint8>& out, const char* str)
1143               {
1144                   out.append(str, strlen(str));
1145               }
1146               
1147               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2