(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 mike  1.13 //%/////////////////////////////////////////////////////////////////////////////
   2            //
   3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
   4            // The Open Group, Tivoli Systems
   5 mike  1.13 //
   6            // Permission is hereby granted, free of charge, to any person obtaining a copy
   7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
   8            // deal in the Software without restriction, including without limitation the
   9            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  11            // furnished to do so, subject to the following conditions:
  12            // 
  13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  14 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  15            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  17            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21            //
  22            //==============================================================================
  23            //
  24            // Author: Mike Brasher (mbrasher@bmc.com)
  25            //
  26            // Modified By:
  27            //
  28            //%/////////////////////////////////////////////////////////////////////////////
  29            
  30            ////////////////////////////////////////////////////////////////////////////////
  31            //
  32            // XmlParser
  33            //
  34            //	This file contains a simple non-validating XML parser. Here are 
  35            //	serveral rules for well-formed XML:
  36            //
  37            //	    1.	Documents must begin with an XML declaration:
  38            //
  39            //		<?xml version="1.0" standalone="yes"?>
  40 mike  1.13 //
  41            //	    2.	Comments have the form:
  42            //
  43            //		<!-- blah blah blah -->
  44            //
  45            //	    3. The following entity references are supported:
  46            //
  47            //		&amp - ampersand
  48            //	 	&lt - less-than
  49            //		&gt - greater-than
  50            //		&quot - full quote
  51            //		&apos - apostrophe
  52            //
  53 kumpf 1.18 //             as well as character (numeric) references:
  54            
  55            //              &#49; - decimal reference for character '1'
  56            //              &#x31; - hexadecimal reference for character '1'
  57            //
  58 mike  1.13 //	    4. Element names and attribute names take the following form:
  59            //
  60            //		[A-Za-z_][A-Za-z_0-9-.:]
  61            //
  62            //	    5.	Arbitrary data (CDATA) can be enclosed like this:
  63            //
  64            //		    <![CDATA[
  65            //		    ...
  66            //		    ]]>
  67            //
  68            //	    6.	Element names and attributes names are case-sensitive.
  69            //
  70            //	    7.	XmlAttribute values must be delimited by full or half quotes.
  71            //		XmlAttribute values must be delimited.
  72            //
  73            //	    8.  <!DOCTYPE...>
  74            //
  75            // TODO:
  76            //
  77 karl  1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  78 mike  1.13 //	Handle <!DOCTYPE...> sections which are complicated (containing
  79            //        rules rather than references to files).
  80            //
  81            //	Remove newlines from string literals:
  82            //
  83            //          Example: <xyz x="hello
  84            //		world">
  85            //
  86            ////////////////////////////////////////////////////////////////////////////////
  87            
  88 sage  1.14 #include <Pegasus/Common/Config.h>
  89 mike  1.13 #include <cctype>
  90            #include <cstdio>
  91            #include <cstdlib>
  92            #include <cstring>
  93            #include "XmlParser.h"
  94            #include "Logger.h"
  95 chuck 1.19 #include "ExceptionRep.h"
  96 mike  1.13 
  97            PEGASUS_NAMESPACE_BEGIN
  98            
  99            #define PEGASUS_ARRAY_T XmlEntry
 100            # include "ArrayImpl.h"
 101            #undef PEGASUS_ARRAY_T
 102            
 103            
 104            ////////////////////////////////////////////////////////////////////////////////
 105            //
 106            // Static helper functions
 107            //
 108            ////////////////////////////////////////////////////////////////////////////////
 109            
 110            static void _printValue(const char* p)
 111            {
 112                for (; *p; p++)
 113                {
 114            	if (*p == '\n')
 115            	    PEGASUS_STD(cout) << "\\n";
 116            	else if (*p == '\r')
 117 mike  1.13 	    PEGASUS_STD(cout) << "\\r";
 118            	else if (*p == '\t')
 119            	    PEGASUS_STD(cout) << "\\t";
 120            	else
 121            	    PEGASUS_STD(cout) << *p;
 122                }
 123            }
 124            
 125            struct EntityReference
 126            {
 127                const char* match;
 128                Uint32 length;
 129                char replacement;
 130            };
 131            
 132 kumpf 1.18 // ATTN: Add support for more entity references
 133 mike  1.13 static EntityReference _references[] =
 134            {
 135                { "&amp;", 5, '&' },
 136                { "&lt;", 4, '<' },
 137                { "&gt;", 4, '>' },
 138                { "&quot;", 6, '"' },
 139                { "&apos;", 6, '\'' }
 140            };
 141            
 142            static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 143            
 144            // Remove all redundant spaces from the given string:
 145            
 146            static void _normalize(char* text)
 147            {
 148                Uint32 length = strlen(text);
 149                char* p = text;
 150                char* end = p + length;
 151            
 152                // Remove leading spaces:
 153            
 154 mike  1.13     while (isspace(*p))
 155            	p++;
 156            
 157                if (p != text)
 158            	memmove(text, p, end - p + 1);
 159            
 160                p = text;
 161            
 162                // Look for sequences of more than one space and remove all but one.
 163            
 164                for (;;)
 165                {
 166            	// Advance to the next space:
 167            
 168            	while (*p && !isspace(*p))
 169            	    p++;
 170            
 171            	if (!*p)
 172            	    break;
 173            
 174            	// Advance to the next non-space:
 175 mike  1.13 
 176            	char* q = p++;
 177            
 178            	while (isspace(*p))
 179            	    p++;
 180            
 181            	// Discard trailing spaces (if we are at the end):
 182            
 183            	if (!*p)
 184            	{
 185            	    *q = '\0';
 186            	    break;
 187            	}
 188            
 189            	// Remove the redundant spaces:
 190            
 191            	Uint32 n = p - q;
 192            
 193            	if (n > 1)
 194            	{
 195            	    *q++ = ' ';
 196 mike  1.13 	    memmove(q, p, end - p + 1);
 197            	    p = q;
 198            	}
 199                }
 200            }
 201            
 202            ////////////////////////////////////////////////////////////////////////////////
 203            //
 204            // XmlException
 205            //
 206            ////////////////////////////////////////////////////////////////////////////////
 207            
 208            static const char* _xmlMessages[] =
 209            {
 210                "Bad opening element",
 211                "Bad closing element",
 212                "Bad attribute name",
 213                "Exepected equal sign",
 214                "Bad attribute value",
 215                "A \"--\" sequence found within comment",
 216                "Unterminated comment",
 217 mike  1.13     "Unterminated CDATA block",
 218                "Unterminated DOCTYPE",
 219                "Too many attributes: parser only handles 10",
 220                "Malformed reference",
 221                "Expected a comment or CDATA following \"<!\" sequence",
 222                "Closing element does not match opening element",
 223                "One or more tags are still open",
 224                "More than one root element was encountered",
 225                "Validation error",
 226                "Semantic error"
 227            };
 228            
 229 chuck 1.19 static const char* _xmlKeys[] = 
 230            {
 231 humberto 1.20     "Common.XmlParser.BAD_START_TAG",
 232 chuck    1.19     "Common.XmlParser.BAD_END_TAG",
 233                   "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 234                   "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 235                   "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 236                   "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 237                   "Common.XmlParser.UNTERMINATED_COMMENT",
 238                   "Common.XmlParser.UNTERMINATED_CDATA",
 239                   "Common.XmlParser.UNTERMINATED_DOCTYPE",
 240                   "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 241                   "Common.XmlParser.MALFORMED_REFERENCE",
 242                   "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 243                   "Common.XmlParser.START_END_MISMATCH",
 244                   "Common.XmlParser.UNCLOSED_TAGS", 
 245                   "Common.XmlParser.MULTIPLE_ROOTS",
 246                   "Common.XmlParser.VALIDATION_ERROR",
 247                   "Common.XmlParser.SEMANTIC_ERROR"
 248               };
 249               
 250               // l10n TODO replace _formMessage with the commented one and uncomment
 251               // the new constructors
 252               /*
 253 mike     1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 254               {
 255                   String result = _xmlMessages[Uint32(code) - 1];
 256               
 257                   char buffer[32];
 258                   sprintf(buffer, "%d", line);
 259                   result.append(": on line ");
 260                   result.append(buffer);
 261               
 262                   if (message.size())
 263                   {
 264               	result.append(": ");
 265               	result.append(message);
 266                   }
 267               
 268                   return result;
 269               }
 270 chuck    1.19 */
 271               
 272               static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 273               {
 274                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 275                   String key = _xmlKeys[Uint32(code) - 1];
 276               	String msg = message;
 277               
 278                   dftMsg.append(": on line $0");
 279                   if (message.size())
 280                   {
 281 humberto 1.20     	msg = ": " + msg;
 282 chuck    1.19     	dftMsg.append("$1");
 283                   }    
 284               
 285                   return MessageLoaderParms(key, dftMsg, line ,msg);
 286               }
 287               
 288               static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 289               {
 290                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 291                   String key = _xmlKeys[Uint32(code) - 1];
 292               
 293                   dftMsg.append(": on line $0");
 294                
 295                   return MessageLoaderParms(key, dftMsg, line);
 296               }
 297               
 298 mike     1.13 
 299               XmlException::XmlException(
 300                   XmlException::Code code, 
 301                   Uint32 lineNumber,
 302                   const String& message) 
 303                   : Exception(_formMessage(code, lineNumber, message))
 304               {
 305               
 306               }
 307               
 308 chuck    1.19 
 309               XmlException::XmlException(
 310                   XmlException::Code code, 
 311                   Uint32 lineNumber,
 312                   MessageLoaderParms& msgParms) 
 313                   : Exception(_formPartialMessage(code, lineNumber))
 314               {
 315 humberto 1.21 	if (msgParms.default_msg.size())
 316                   {
 317                   	msgParms.default_msg = ": " + msgParms.default_msg;
 318                   } 
 319 chuck    1.19 	_rep->message.append(MessageLoader::getMessage(msgParms));
 320               }
 321               
 322               
 323 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 324               //
 325               // XmlValidationError
 326               //
 327               ////////////////////////////////////////////////////////////////////////////////
 328               
 329               XmlValidationError::XmlValidationError(
 330                   Uint32 lineNumber,
 331                   const String& message)
 332                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 333               {
 334               
 335               }
 336               
 337 chuck    1.19 
 338               XmlValidationError::XmlValidationError(
 339                   Uint32 lineNumber,
 340                   MessageLoaderParms& msgParms)
 341                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 342               {
 343               
 344               }
 345               
 346               
 347 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 348               //
 349               // XmlSemanticError
 350               //
 351               ////////////////////////////////////////////////////////////////////////////////
 352               
 353               XmlSemanticError::XmlSemanticError(
 354                   Uint32 lineNumber,
 355                   const String& message)
 356                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 357               {
 358               
 359               }
 360 chuck    1.19 
 361               
 362               XmlSemanticError::XmlSemanticError(
 363                   Uint32 lineNumber,
 364                   MessageLoaderParms& msgParms)
 365                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 366               {
 367               
 368               }
 369               
 370 mike     1.13 
 371               ////////////////////////////////////////////////////////////////////////////////
 372               //
 373               // XmlParser
 374               //
 375               ////////////////////////////////////////////////////////////////////////////////
 376               
 377               XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 
 378                   _restoreChar('\0'), _foundRoot(false)
 379               {
 380               
 381               }
 382               
 383               Boolean XmlParser::next(XmlEntry& entry)
 384               {
 385                   if (!_putBackStack.isEmpty())
 386                   {
 387               	entry = _putBackStack.top();
 388               	_putBackStack.pop();
 389               	return true;
 390                   }
 391 mike     1.13 
 392                   // If a character was overwritten with a null-terminator the last
 393                   // time this routine was called, then put back that character. Before
 394                   // exiting of course, restore the null-terminator.
 395               
 396                   char* nullTerminator = 0;
 397               
 398                   if (_restoreChar && !*_current)
 399                   {
 400               	nullTerminator = _current;
 401               	*_current = _restoreChar;
 402               	_restoreChar = '\0';
 403                   }
 404               
 405                   // Skip over any whitespace:
 406               
 407                   _skipWhitespace(_current);
 408               
 409                   if (!*_current)
 410                   {
 411               	if (nullTerminator)
 412 mike     1.13 	    *nullTerminator = '\0';
 413               
 414               	if (!_stack.isEmpty())
 415               	    throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 416               
 417               	return false;
 418                   }
 419               
 420                   // Either a "<...>" or content begins next:
 421               
 422                   if (*_current == '<')
 423                   {
 424               	_current++;
 425               	_getElement(_current, entry);
 426               
 427               	if (nullTerminator)
 428               	    *nullTerminator = '\0';
 429               
 430               	if (entry.type == XmlEntry::START_TAG)
 431               	{
 432               	    if (_stack.isEmpty() && _foundRoot)
 433 mike     1.13 		throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 434               
 435               	    _foundRoot = true;
 436               	    _stack.push((char*)entry.text);
 437               	}
 438               	else if (entry.type == XmlEntry::END_TAG)
 439               	{
 440               	    if (_stack.isEmpty())
 441               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 442               
 443               	    if (strcmp(_stack.top(), entry.text) != 0)
 444               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 445               
 446               	    _stack.pop();
 447               	}
 448               
 449               	return true;
 450                   }
 451                   else
 452                   {
 453               	entry.type = XmlEntry::CONTENT;
 454 mike     1.13 	entry.text = _current;
 455               	_getContent(_current);
 456               	_restoreChar = *_current;
 457               	*_current = '\0';
 458               
 459               	if (nullTerminator)
 460               	    *nullTerminator = '\0';
 461               
 462               	_substituteReferences((char*)entry.text);
 463               	_normalize((char*)entry.text);
 464               
 465               	return true;
 466                   }
 467               }
 468               
 469               void XmlParser::putBack(XmlEntry& entry)
 470               {
 471                   _putBackStack.push(entry);
 472               }
 473               
 474               XmlParser::~XmlParser()
 475 mike     1.13 {
 476                   // Nothing to do!
 477               }
 478               
 479               void XmlParser::_skipWhitespace(char*& p)
 480               {
 481                   while (*p && isspace(*p))
 482                   {
 483               	if (*p == '\n')
 484               	    _line++;
 485               
 486               	p++;
 487                   }
 488               }
 489               
 490               Boolean XmlParser::_getElementName(char*& p)
 491               {
 492                   if (!isalpha(*p) && *p != '_')
 493               	throw XmlException(XmlException::BAD_START_TAG, _line);
 494               
 495                   while (*p && 
 496 mike     1.13 	(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 497               	p++;
 498               
 499                   // The next character must be a space:
 500               
 501                   if (isspace(*p))
 502                   {
 503               	*p++ = '\0';
 504               	_skipWhitespace(p);
 505                   }
 506               
 507                   if (*p == '>')
 508                   {
 509               	*p++ = '\0';
 510               	return true;
 511                   }
 512               
 513                   return false;
 514               }
 515               
 516               Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 517 mike     1.13 {
 518                   openCloseElement = false;
 519               
 520                   if (!isalpha(*p) && *p != '_')
 521               	throw XmlException(XmlException::BAD_START_TAG, _line);
 522               
 523                   while (*p && 
 524               	(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 525               	p++;
 526               
 527                   // The next character must be a space:
 528               
 529                   if (isspace(*p))
 530                   {
 531               	*p++ = '\0';
 532               	_skipWhitespace(p);
 533                   }
 534               
 535                   if (*p == '>')
 536                   {
 537               	*p++ = '\0';
 538 mike     1.13 	return true;
 539                   }
 540               
 541                   if (p[0] == '/' && p[1] == '>')
 542                   {
 543               	openCloseElement = true;
 544               	*p = '\0';
 545               	p += 2;
 546               	return true;
 547                   }
 548               
 549                   return false;
 550               }
 551               
 552               void XmlParser::_getAttributeNameAndEqual(char*& p)
 553               {
 554                   if (!isalpha(*p) && *p != '_')
 555               	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 556               
 557                   while (*p && 
 558               	(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 559 mike     1.13 	p++;
 560               
 561                   char* term = p;
 562               
 563                   _skipWhitespace(p);
 564               
 565                   if (*p != '=')
 566               	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 567               
 568                   p++;
 569               
 570                   _skipWhitespace(p);
 571               
 572                   *term = '\0';
 573               }
 574               
 575               void XmlParser::_getAttributeValue(char*& p)
 576               {
 577                   // ATTN-B: handle values contained in semiquotes:
 578               
 579                   if (*p != '"' && *p != '\'')
 580 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 581               
 582                   char startChar = *p++;
 583               
 584                   while (*p && *p != startChar)
 585               	p++;
 586               
 587                   if (*p != startChar)
 588               	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 589               
 590                   *p++ = '\0';
 591               }
 592               
 593               void XmlParser::_getComment(char*& p)
 594               {
 595                   // Now p points to first non-whitespace character beyond "<--" sequence:
 596               
 597                   for (; *p; p++)
 598                   {
 599               	if (p[0] == '-' && p[1] == '-')
 600               	{
 601 mike     1.13 	    if (p[2] != '>')
 602               	    {
 603               		throw XmlException(
 604               		    XmlException::MINUS_MINUS_IN_COMMENT, _line);
 605               	    }
 606               
 607               	    // Find end of comment (excluding whitespace):
 608               
 609               	    *p = '\0';
 610               	    p += 3;
 611               	    return;
 612               	}
 613                   }
 614               
 615                   // If it got this far, then the comment is unterminated:
 616               
 617                   throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 618               }
 619               
 620               void XmlParser::_getCData(char*& p)
 621               {
 622 mike     1.13     // At this point p points one past "<![CDATA[" sequence:
 623               
 624                   for (; *p; p++)
 625                   {
 626               	if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 627               	{
 628               	    *p = '\0';
 629               	    p += 3;
 630               	    return;
 631               	}
 632               	else if (*p == '\n')
 633               	    _line++;
 634                   }
 635               
 636                   // If it got this far, then the comment is unterminated:
 637               
 638                   throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 639               }
 640               
 641               void XmlParser::_getDocType(char*& p)
 642               {
 643 mike     1.13     // Just ignore the DOCTYPE command for now:
 644               
 645                   for (; *p && *p != '>'; p++)
 646                   {
 647               	if (*p == '\n')
 648               	    _line++;
 649                   }
 650               
 651                   if (*p != '>')
 652               	throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 653               
 654                   p++;
 655               }
 656               
 657               void XmlParser::_getContent(char*& p)
 658               {
 659                   while (*p && *p != '<')
 660                   {
 661               	if (*p == '\n')
 662               	    _line++;
 663               
 664 mike     1.13 	p++;
 665                   }
 666               }
 667               
 668               void XmlParser::_substituteReferences(char* text)
 669               {
 670                   Uint32 rem = strlen(text);
 671               
 672                   for (char* p = text; *p; p++, rem--)
 673                   {
 674               	if (*p == '&')
 675               	{
 676 kumpf    1.18             // Process character or entity reference
 677 mike     1.13 
 678 kumpf    1.18             Uint16 referenceChar = 0;
 679                           Uint32 referenceLength = 0;
 680                           XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 681               
 682                           if (*(p+1) == '#')
 683                           {
 684                               // Found a character (numeric) reference
 685                               // Determine whether it is decimal or hex
 686                               if (*(p+2) == 'x')
 687                               {
 688                                   // Decode a hexadecimal character reference
 689                                   char* q = p+3;
 690               
 691                                   // At most four digits are allowed, plus trailing ';'
 692                                   Uint32 numDigits;
 693                                   for (numDigits = 0; numDigits < 5; numDigits++, q++)
 694                                   {
 695                                       if (isdigit(*q))
 696                                       {
 697                                           referenceChar = (referenceChar << 4);
 698                                           referenceChar += (*q - '0');
 699 kumpf    1.18                         }
 700                                       else if ((*q >= 'A') && (*q <= 'F'))
 701                                       {
 702                                           referenceChar = (referenceChar << 4);
 703                                           referenceChar += (*q - 'A' + 10);
 704                                       }
 705                                       else if ((*q >= 'a') && (*q <= 'f'))
 706                                       {
 707                                           referenceChar = (referenceChar << 4);
 708                                           referenceChar += (*q - 'a' + 10);
 709                                       }
 710                                       else if (*q == ';')
 711                                       {
 712                                           break;
 713                                       }
 714                                       else
 715                                       {
 716                                           throw XmlException(code, _line);
 717                                       }
 718                                   }
 719               
 720 kumpf    1.18                     // Hex number must be 1 - 4 digits
 721                                   if ((numDigits == 0) || (numDigits > 4))
 722                                   {
 723                                       throw XmlException(code, _line);
 724                                   }
 725               
 726                                   // ATTN: Currently do not support 16-bit characters
 727                                   if (referenceChar > 0xff)
 728                                   {
 729                                       // ATTN: Is there a good way to say "unsupported"?
 730                                       throw XmlException(code, _line);
 731                                   }
 732               
 733                                   referenceLength = numDigits + 4;
 734                               }
 735                               else
 736                               {
 737                                   // Decode a decimal character reference
 738                                   Uint32 newChar = 0;
 739                                   char* q = p+2;
 740               
 741 kumpf    1.18                     // At most five digits are allowed, plus trailing ';'
 742                                   Uint32 numDigits;
 743                                   for (numDigits = 0; numDigits < 6; numDigits++, q++)
 744                                   {
 745                                       if (isdigit(*q))
 746                                       {
 747                                           newChar = (newChar * 10);
 748                                           newChar += (*q - '0');
 749                                       }
 750                                       else if (*q == ';')
 751                                       {
 752                                           break;
 753                                       }
 754                                       else
 755                                       {
 756                                           throw XmlException(code, _line);
 757                                       }
 758                                   }
 759               
 760                                   // Decimal number must be 1 - 5 digits and fit in 16 bits
 761                                   if ((numDigits == 0) || (numDigits > 5) ||
 762 kumpf    1.18                         (newChar > 0xffff))
 763                                   {
 764                                       throw XmlException(code, _line);
 765                                   }
 766               
 767                                   // ATTN: Currently do not support 16-bit characters
 768                                   if (newChar > 0xff)
 769                                   {
 770                                       // ATTN: Is there a good way to say "unsupported"?
 771                                       throw XmlException(code, _line);
 772                                   }
 773               
 774                                   referenceChar = Uint16(newChar);
 775                                   referenceLength = numDigits + 3;
 776                               }
 777                           }
 778                           else
 779                           {
 780                               // Check for entity reference
 781                               // ATTN: Inefficient if many entity references are supported
 782                               Uint32 i;
 783 kumpf    1.18                 for (i = 0; i < _REFERENCES_SIZE; i++)
 784                               {
 785                                   Uint32 length = _references[i].length;
 786                                   const char* match = _references[i].match;
 787               
 788                                   if (strncmp(p, _references[i].match, length) == 0)
 789                                   {
 790                                       referenceChar = _references[i].replacement;
 791                                       referenceLength = length;
 792                                       break;
 793                                   }
 794                               }
 795               
 796                               if (i == _REFERENCES_SIZE)
 797                               {
 798                                   // Didn't recognize the entity reference
 799                                   // ATTN: Is there a good way to say "unsupported"?
 800                                   throw XmlException(code, _line);
 801                               }
 802                           }
 803               
 804 kumpf    1.18             // Replace the reference with the correct character
 805                           *p = (char)referenceChar;
 806                           char* q = p + referenceLength;
 807                           rem = rem - referenceLength + 1;
 808                           memmove(p + 1, q, rem);
 809 mike     1.13 	}
 810                   }
 811               }
 812               
 813               static const char _EMPTY_STRING[] = "";
 814               
 815               void XmlParser::_getElement(char*& p, XmlEntry& entry)
 816               {
 817                   entry.attributeCount = 0;
 818               
 819                   //--------------------------------------------------------------------------
 820                   // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 821                   //--------------------------------------------------------------------------
 822               
 823                   if (*p == '?')
 824                   {
 825               	entry.type = XmlEntry::XML_DECLARATION;
 826               	entry.text = ++p;
 827               
 828               	Boolean openCloseElement = false;
 829               
 830 mike     1.13 	if (_getElementName(p))
 831               	    return;
 832                   }
 833                   else if (*p == '!')
 834                   {
 835               	p++;
 836               
 837               	// Expect a comment or CDATA:
 838               
 839               	if (p[0] == '-' && p[1] == '-')
 840               	{
 841               	    p += 2;
 842               	    entry.type = XmlEntry::COMMENT;
 843               	    entry.text = p;
 844               	    _getComment(p);
 845               	    return;
 846               	}
 847               	else if (memcmp(p, "[CDATA[", 7) == 0)
 848               	{
 849               	    p += 7;
 850               	    entry.type = XmlEntry::CDATA;
 851 mike     1.13 	    entry.text = p;
 852               	    _getCData(p);
 853               	    return;
 854               	}
 855               	else if (memcmp(p, "DOCTYPE", 7) == 0)
 856               	{
 857               	    entry.type = XmlEntry::DOCTYPE;
 858               	    entry.text = _EMPTY_STRING;
 859               	    _getDocType(p);
 860               	    return;
 861               	}
 862               	throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 863                   }
 864                   else if (*p == '/')
 865                   {
 866               	entry.type = XmlEntry::END_TAG;
 867               	entry.text = ++p;
 868               
 869               	if (!_getElementName(p))
 870               	    throw(XmlException(XmlException::BAD_END_TAG, _line));
 871               
 872 mike     1.13 	return;
 873                   }
 874                   else if (isalpha(*p) || *p == '_')
 875                   {
 876               	entry.type = XmlEntry::START_TAG;
 877               	entry.text = p;
 878               
 879               	Boolean openCloseElement = false;
 880               
 881               	if (_getOpenElementName(p, openCloseElement))
 882               	{
 883               	    if (openCloseElement)
 884               		entry.type = XmlEntry::EMPTY_TAG;
 885               	    return;
 886               	}
 887                   }
 888                   else
 889               	throw XmlException(XmlException::BAD_START_TAG, _line);
 890               
 891                   //--------------------------------------------------------------------------
 892                   // Grab all the attributes:
 893 mike     1.13     //--------------------------------------------------------------------------
 894               
 895                   for (;;)
 896                   {
 897               	if (entry.type == XmlEntry::XML_DECLARATION)
 898               	{
 899               	    if (p[0] == '?' && p[1] == '>')
 900               	    {
 901               		p += 2;
 902               		return;
 903               	    }
 904               	}
 905               	else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 906               	{
 907               	    entry.type = XmlEntry::EMPTY_TAG;
 908               	    p += 2;
 909               	    return;
 910               	}
 911               	else if (*p == '>')
 912               	{
 913               	    p++;
 914 mike     1.13 	    return;
 915               	}
 916               
 917               	XmlAttribute attr;
 918               	attr.name = p;
 919               	_getAttributeNameAndEqual(p);
 920               
 921               	if (*p != '"' && *p != '\'')
 922               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 923               
 924               	attr.value = p + 1;
 925               	_getAttributeValue(p);
 926               
 927               	if (entry.type == XmlEntry::XML_DECLARATION)
 928               	{
 929               	    // The next thing must a space or a "?>":
 930               
 931               	    if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
 932               	    {
 933               		throw XmlException(
 934               		    XmlException::BAD_ATTRIBUTE_VALUE, _line);
 935 mike     1.13 	    }
 936               	}
 937               	else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
 938               	{
 939               	    // The next thing must be a space or a '>':
 940               
 941               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 942               	}
 943               
 944               	_skipWhitespace(p);
 945               
 946               	if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 947               	    throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 948               
 949               	_substituteReferences((char*)attr.value);
 950               	entry.attributes[entry.attributeCount++] = attr;
 951                   }
 952               }
 953               
 954               static const char* _typeStrings[] =
 955               {
 956 mike     1.13     "XML_DECLARATION", 
 957                   "START_TAG", 
 958                   "EMPTY_TAG", 
 959                   "END_TAG", 
 960                   "COMMENT",
 961                   "CDATA",
 962                   "DOCTYPE",
 963                   "CONTENT" 
 964               };
 965               
 966               void XmlEntry::print() const
 967               {
 968                   PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
 969               
 970                   Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
 971               
 972                   if (needQuotes)
 973               	PEGASUS_STD(cout) << "\"";
 974               	
 975                   _printValue(text);
 976               
 977 mike     1.13     if (needQuotes)
 978               	PEGASUS_STD(cout) << "\"";
 979               
 980                   PEGASUS_STD(cout) << '\n';
 981               
 982                   for (Uint32 i = 0; i < attributeCount; i++)
 983                   {
 984               	PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
 985               	_printValue(attributes[i].value);
 986               	PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
 987                   }
 988               }
 989               
 990               const XmlAttribute* XmlEntry::findAttribute(
 991                   const char* name) const
 992               {
 993                   for (Uint32 i = 0; i < attributeCount; i++)
 994                   {
 995               	if (strcmp(attributes[i].name, name) == 0)
 996               	    return &attributes[i];
 997                   }
 998 mike     1.13 
 999                   return 0;
1000               }
1001               
1002               // Find first non-whitespace character (set first) and last non-whitespace
1003               // character (set last one past this). For example, consider this string:
1004               //
1005               //	"   87     "
1006               //
1007               // The first pointer would point to '8' and the last pointer woudl point one
1008               // beyond '7'.
1009               
1010               static void _findEnds(
1011                   const char* str, 
1012                   const char*& first, 
1013                   const char*& last)
1014               {
1015                   first = str;
1016               
1017                   while (isspace(*first))
1018               	first++;
1019 mike     1.13 
1020                   if (!*first)
1021                   {
1022               	last = first;
1023               	return;
1024                   }
1025               
1026                   last = first + strlen(first);
1027               
1028                   while (last != first && isspace(last[-1]))
1029               	last--;
1030               }
1031               
1032               Boolean XmlEntry::getAttributeValue(
1033                   const char* name, 
1034                   Uint32& value) const
1035               {
1036                   const XmlAttribute* attr = findAttribute(name);
1037               
1038                   if (!attr)
1039               	return false;
1040 mike     1.13 
1041                   const char* first;
1042                   const char* last;
1043                   _findEnds(attr->value, first, last);
1044               
1045                   char* end = 0;
1046                   long tmp = strtol(first, &end, 10);
1047               
1048                   if (!end || end != last)
1049               	return false;
1050               
1051                   value = Uint32(tmp);
1052                   return true;
1053               }
1054               
1055               Boolean XmlEntry::getAttributeValue(
1056                   const char* name, 
1057                   Real32& value) const
1058               {
1059                   const XmlAttribute* attr = findAttribute(name);
1060               
1061 mike     1.13     if (!attr)
1062               	return false;
1063               
1064                   const char* first;
1065                   const char* last;
1066                   _findEnds(attr->value, first, last);
1067               
1068                   char* end = 0;
1069                   double tmp = strtod(first, &end);
1070               
1071                   if (!end || end != last)
1072               	return false;
1073               
1074                   value = Uint32(tmp);
1075                   return true;
1076               }
1077               
1078               Boolean XmlEntry::getAttributeValue(
1079                   const char* name, 
1080                   const char*& value) const
1081               {
1082 mike     1.13     const XmlAttribute* attr = findAttribute(name);
1083               
1084                   if (!attr)
1085               	return false;
1086               
1087                   value = attr->value;
1088                   return true;
1089               }
1090               
1091               Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1092               {
1093                   const char* tmp;
1094               
1095                   if (!getAttributeValue(name, tmp))
1096               	return false;
1097               
1098                   value = tmp;
1099                   return true;
1100               }
1101               
1102               void XmlAppendCString(Array<Sint8>& out, const char* str)
1103 mike     1.13 {
1104                   out.append(str, strlen(str));
1105               }
1106               
1107               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2