(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 mike  1.13 //%/////////////////////////////////////////////////////////////////////////////
   2            //
   3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
   4            // The Open Group, Tivoli Systems
   5 mike  1.13 //
   6            // Permission is hereby granted, free of charge, to any person obtaining a copy
   7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
   8            // deal in the Software without restriction, including without limitation the
   9            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  11            // furnished to do so, subject to the following conditions:
  12            // 
  13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  14 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  15            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  17            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21            //
  22            //==============================================================================
  23            //
  24            // Author: Mike Brasher (mbrasher@bmc.com)
  25            //
  26            // Modified By:
  27            //
  28            //%/////////////////////////////////////////////////////////////////////////////
  29            
  30            ////////////////////////////////////////////////////////////////////////////////
  31            //
  32            // XmlParser
  33            //
  34            //	This file contains a simple non-validating XML parser. Here are 
  35            //	serveral rules for well-formed XML:
  36            //
  37            //	    1.	Documents must begin with an XML declaration:
  38            //
  39            //		<?xml version="1.0" standalone="yes"?>
  40 mike  1.13 //
  41            //	    2.	Comments have the form:
  42            //
  43            //		<!-- blah blah blah -->
  44            //
  45            //	    3. The following entity references are supported:
  46            //
  47            //		&amp - ampersand
  48            //	 	&lt - less-than
  49            //		&gt - greater-than
  50            //		&quot - full quote
  51            //		&apos - apostrophe
  52            //
  53 kumpf 1.18 //             as well as character (numeric) references:
  54            
  55            //              &#49; - decimal reference for character '1'
  56            //              &#x31; - hexadecimal reference for character '1'
  57            //
  58 mike  1.13 //	    4. Element names and attribute names take the following form:
  59            //
  60            //		[A-Za-z_][A-Za-z_0-9-.:]
  61            //
  62            //	    5.	Arbitrary data (CDATA) can be enclosed like this:
  63            //
  64            //		    <![CDATA[
  65            //		    ...
  66            //		    ]]>
  67            //
  68            //	    6.	Element names and attributes names are case-sensitive.
  69            //
  70            //	    7.	XmlAttribute values must be delimited by full or half quotes.
  71            //		XmlAttribute values must be delimited.
  72            //
  73            //	    8.  <!DOCTYPE...>
  74            //
  75            // TODO:
  76            //
  77 karl  1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  78 mike  1.13 //	Handle <!DOCTYPE...> sections which are complicated (containing
  79            //        rules rather than references to files).
  80            //
  81            //	Remove newlines from string literals:
  82            //
  83            //          Example: <xyz x="hello
  84            //		world">
  85            //
  86            ////////////////////////////////////////////////////////////////////////////////
  87            
  88 sage  1.14 #include <Pegasus/Common/Config.h>
  89 mike  1.13 #include <cctype>
  90            #include <cstdio>
  91            #include <cstdlib>
  92            #include <cstring>
  93            #include "XmlParser.h"
  94            #include "Logger.h"
  95 chuck 1.19 #include "ExceptionRep.h"
  96 mike  1.13 
  97            PEGASUS_NAMESPACE_BEGIN
  98            
  99            #define PEGASUS_ARRAY_T XmlEntry
 100            # include "ArrayImpl.h"
 101            #undef PEGASUS_ARRAY_T
 102            
 103            
 104            ////////////////////////////////////////////////////////////////////////////////
 105            //
 106            // Static helper functions
 107            //
 108            ////////////////////////////////////////////////////////////////////////////////
 109            
 110            static void _printValue(const char* p)
 111            {
 112                for (; *p; p++)
 113                {
 114            	if (*p == '\n')
 115            	    PEGASUS_STD(cout) << "\\n";
 116            	else if (*p == '\r')
 117 mike  1.13 	    PEGASUS_STD(cout) << "\\r";
 118            	else if (*p == '\t')
 119            	    PEGASUS_STD(cout) << "\\t";
 120            	else
 121            	    PEGASUS_STD(cout) << *p;
 122                }
 123            }
 124            
 125            struct EntityReference
 126            {
 127                const char* match;
 128                Uint32 length;
 129                char replacement;
 130            };
 131            
 132 kumpf 1.18 // ATTN: Add support for more entity references
 133 mike  1.13 static EntityReference _references[] =
 134            {
 135                { "&amp;", 5, '&' },
 136                { "&lt;", 4, '<' },
 137                { "&gt;", 4, '>' },
 138                { "&quot;", 6, '"' },
 139                { "&apos;", 6, '\'' }
 140            };
 141            
 142            static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 143            
 144            // Remove all redundant spaces from the given string:
 145            
 146            static void _normalize(char* text)
 147            {
 148                Uint32 length = strlen(text);
 149                char* p = text;
 150                char* end = p + length;
 151            
 152                // Remove leading spaces:
 153            
 154 mike  1.13     while (isspace(*p))
 155            	p++;
 156            
 157                if (p != text)
 158            	memmove(text, p, end - p + 1);
 159            
 160                p = text;
 161            
 162                // Look for sequences of more than one space and remove all but one.
 163            
 164                for (;;)
 165                {
 166            	// Advance to the next space:
 167            
 168            	while (*p && !isspace(*p))
 169            	    p++;
 170            
 171            	if (!*p)
 172            	    break;
 173            
 174            	// Advance to the next non-space:
 175 mike  1.13 
 176            	char* q = p++;
 177            
 178            	while (isspace(*p))
 179            	    p++;
 180            
 181            	// Discard trailing spaces (if we are at the end):
 182            
 183            	if (!*p)
 184            	{
 185            	    *q = '\0';
 186            	    break;
 187            	}
 188            
 189            	// Remove the redundant spaces:
 190            
 191            	Uint32 n = p - q;
 192            
 193            	if (n > 1)
 194            	{
 195            	    *q++ = ' ';
 196 mike  1.13 	    memmove(q, p, end - p + 1);
 197            	    p = q;
 198            	}
 199                }
 200            }
 201            
 202            ////////////////////////////////////////////////////////////////////////////////
 203            //
 204            // XmlException
 205            //
 206            ////////////////////////////////////////////////////////////////////////////////
 207            
 208            static const char* _xmlMessages[] =
 209            {
 210                "Bad opening element",
 211                "Bad closing element",
 212                "Bad attribute name",
 213                "Exepected equal sign",
 214                "Bad attribute value",
 215                "A \"--\" sequence found within comment",
 216                "Unterminated comment",
 217 mike  1.13     "Unterminated CDATA block",
 218                "Unterminated DOCTYPE",
 219                "Too many attributes: parser only handles 10",
 220                "Malformed reference",
 221                "Expected a comment or CDATA following \"<!\" sequence",
 222                "Closing element does not match opening element",
 223                "One or more tags are still open",
 224                "More than one root element was encountered",
 225                "Validation error",
 226                "Semantic error"
 227            };
 228            
 229 chuck 1.19 static const char* _xmlKeys[] = 
 230            {
 231 humberto 1.20     "Common.XmlParser.BAD_START_TAG",
 232 chuck    1.19     "Common.XmlParser.BAD_END_TAG",
 233                   "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 234                   "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 235                   "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 236                   "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 237                   "Common.XmlParser.UNTERMINATED_COMMENT",
 238                   "Common.XmlParser.UNTERMINATED_CDATA",
 239                   "Common.XmlParser.UNTERMINATED_DOCTYPE",
 240                   "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 241                   "Common.XmlParser.MALFORMED_REFERENCE",
 242                   "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 243                   "Common.XmlParser.START_END_MISMATCH",
 244                   "Common.XmlParser.UNCLOSED_TAGS", 
 245                   "Common.XmlParser.MULTIPLE_ROOTS",
 246                   "Common.XmlParser.VALIDATION_ERROR",
 247                   "Common.XmlParser.SEMANTIC_ERROR"
 248               };
 249               
 250               // l10n TODO replace _formMessage with the commented one and uncomment
 251               // the new constructors
 252               /*
 253 mike     1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 254               {
 255                   String result = _xmlMessages[Uint32(code) - 1];
 256               
 257                   char buffer[32];
 258                   sprintf(buffer, "%d", line);
 259                   result.append(": on line ");
 260                   result.append(buffer);
 261               
 262                   if (message.size())
 263                   {
 264               	result.append(": ");
 265               	result.append(message);
 266                   }
 267               
 268                   return result;
 269               }
 270 chuck    1.19 */
 271               
 272               static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 273               {
 274                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 275                   String key = _xmlKeys[Uint32(code) - 1];
 276               	String msg = message;
 277               
 278                   dftMsg.append(": on line $0");
 279                   if (message.size())
 280                   {
 281 humberto 1.20     	msg = ": " + msg;
 282 chuck    1.19     	dftMsg.append("$1");
 283                   }    
 284               
 285                   return MessageLoaderParms(key, dftMsg, line ,msg);
 286               }
 287               
 288               static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 289               {
 290                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 291                   String key = _xmlKeys[Uint32(code) - 1];
 292               
 293                   dftMsg.append(": on line $0");
 294                
 295                   return MessageLoaderParms(key, dftMsg, line);
 296               }
 297               
 298 mike     1.13 
 299               XmlException::XmlException(
 300                   XmlException::Code code, 
 301                   Uint32 lineNumber,
 302                   const String& message) 
 303                   : Exception(_formMessage(code, lineNumber, message))
 304               {
 305               
 306               }
 307               
 308 chuck    1.19 
 309               XmlException::XmlException(
 310                   XmlException::Code code, 
 311                   Uint32 lineNumber,
 312                   MessageLoaderParms& msgParms) 
 313                   : Exception(_formPartialMessage(code, lineNumber))
 314               {
 315 humberto 1.21 	if (msgParms.default_msg.size())
 316                   {
 317                   	msgParms.default_msg = ": " + msgParms.default_msg;
 318                   } 
 319 chuck    1.19 	_rep->message.append(MessageLoader::getMessage(msgParms));
 320               }
 321               
 322               
 323 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 324               //
 325               // XmlValidationError
 326               //
 327               ////////////////////////////////////////////////////////////////////////////////
 328               
 329               XmlValidationError::XmlValidationError(
 330                   Uint32 lineNumber,
 331                   const String& message)
 332                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 333               {
 334               
 335               }
 336               
 337 chuck    1.19 
 338               XmlValidationError::XmlValidationError(
 339                   Uint32 lineNumber,
 340                   MessageLoaderParms& msgParms)
 341                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 342               {
 343               
 344               }
 345               
 346               
 347 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 348               //
 349               // XmlSemanticError
 350               //
 351               ////////////////////////////////////////////////////////////////////////////////
 352               
 353               XmlSemanticError::XmlSemanticError(
 354                   Uint32 lineNumber,
 355                   const String& message)
 356                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 357               {
 358               
 359               }
 360 chuck    1.19 
 361               
 362               XmlSemanticError::XmlSemanticError(
 363                   Uint32 lineNumber,
 364                   MessageLoaderParms& msgParms)
 365                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 366               {
 367               
 368               }
 369               
 370 mike     1.13 
 371               ////////////////////////////////////////////////////////////////////////////////
 372               //
 373               // XmlParser
 374               //
 375               ////////////////////////////////////////////////////////////////////////////////
 376               
 377               XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 
 378                   _restoreChar('\0'), _foundRoot(false)
 379               {
 380               
 381               }
 382               
 383               Boolean XmlParser::next(XmlEntry& entry)
 384               {
 385                   if (!_putBackStack.isEmpty())
 386                   {
 387               	entry = _putBackStack.top();
 388               	_putBackStack.pop();
 389               	return true;
 390                   }
 391 mike     1.13 
 392                   // If a character was overwritten with a null-terminator the last
 393                   // time this routine was called, then put back that character. Before
 394                   // exiting of course, restore the null-terminator.
 395               
 396                   char* nullTerminator = 0;
 397               
 398                   if (_restoreChar && !*_current)
 399                   {
 400               	nullTerminator = _current;
 401               	*_current = _restoreChar;
 402               	_restoreChar = '\0';
 403                   }
 404               
 405                   // Skip over any whitespace:
 406               
 407                   _skipWhitespace(_current);
 408               
 409                   if (!*_current)
 410                   {
 411               	if (nullTerminator)
 412 mike     1.13 	    *nullTerminator = '\0';
 413               
 414               	if (!_stack.isEmpty())
 415               	    throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 416               
 417               	return false;
 418                   }
 419               
 420                   // Either a "<...>" or content begins next:
 421               
 422                   if (*_current == '<')
 423                   {
 424               	_current++;
 425               	_getElement(_current, entry);
 426               
 427               	if (nullTerminator)
 428               	    *nullTerminator = '\0';
 429               
 430               	if (entry.type == XmlEntry::START_TAG)
 431               	{
 432               	    if (_stack.isEmpty() && _foundRoot)
 433 mike     1.13 		throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 434               
 435               	    _foundRoot = true;
 436               	    _stack.push((char*)entry.text);
 437               	}
 438               	else if (entry.type == XmlEntry::END_TAG)
 439               	{
 440               	    if (_stack.isEmpty())
 441               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 442               
 443               	    if (strcmp(_stack.top(), entry.text) != 0)
 444               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 445               
 446               	    _stack.pop();
 447               	}
 448               
 449               	return true;
 450                   }
 451                   else
 452                   {
 453               	entry.type = XmlEntry::CONTENT;
 454 mike     1.13 	entry.text = _current;
 455               	_getContent(_current);
 456               	_restoreChar = *_current;
 457               	*_current = '\0';
 458               
 459               	if (nullTerminator)
 460               	    *nullTerminator = '\0';
 461               
 462               	_substituteReferences((char*)entry.text);
 463               	_normalize((char*)entry.text);
 464               
 465               	return true;
 466                   }
 467               }
 468               
 469               void XmlParser::putBack(XmlEntry& entry)
 470               {
 471                   _putBackStack.push(entry);
 472               }
 473               
 474               XmlParser::~XmlParser()
 475 mike     1.13 {
 476                   // Nothing to do!
 477               }
 478               
 479               void XmlParser::_skipWhitespace(char*& p)
 480               {
 481                   while (*p && isspace(*p))
 482                   {
 483               	if (*p == '\n')
 484               	    _line++;
 485               
 486               	p++;
 487                   }
 488               }
 489               
 490               Boolean XmlParser::_getElementName(char*& p)
 491               {
 492 david    1.22     if (!String::isUTF8(p))
 493 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 494               
 495 david    1.22     while ((*p) &&
 496               	   (((*p >= 'A') && (*p <= 'Z')) ||
 497               	    ((*p >= 'a') && (*p <= 'z')) ||
 498               	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 499 mike     1.13 	p++;
 500               
 501                   // The next character must be a space:
 502               
 503                   if (isspace(*p))
 504                   {
 505               	*p++ = '\0';
 506               	_skipWhitespace(p);
 507                   }
 508               
 509                   if (*p == '>')
 510                   {
 511               	*p++ = '\0';
 512               	return true;
 513                   }
 514               
 515                   return false;
 516               }
 517               
 518               Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 519               {
 520 mike     1.13     openCloseElement = false;
 521               
 522 david    1.22     if (!String::isUTF8(p))
 523 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 524               
 525 david    1.22     while ((*p) &&
 526               	   (((*p >= 'A') && (*p <= 'Z')) ||
 527               	    ((*p >= 'a') && (*p <= 'z')) ||
 528               	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 529 mike     1.13 	p++;
 530               
 531                   // The next character must be a space:
 532               
 533                   if (isspace(*p))
 534                   {
 535               	*p++ = '\0';
 536               	_skipWhitespace(p);
 537                   }
 538               
 539                   if (*p == '>')
 540                   {
 541               	*p++ = '\0';
 542               	return true;
 543                   }
 544               
 545                   if (p[0] == '/' && p[1] == '>')
 546                   {
 547               	openCloseElement = true;
 548               	*p = '\0';
 549               	p += 2;
 550 mike     1.13 	return true;
 551                   }
 552               
 553                   return false;
 554               }
 555               
 556               void XmlParser::_getAttributeNameAndEqual(char*& p)
 557               {
 558 david    1.22     if (!String::isUTF8(p))
 559 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 560               
 561 david    1.22     while ((*p) &&
 562               	   (((*p >= 'A') && (*p <= 'Z')) ||
 563               	    ((*p >= 'a') && (*p <= 'z')) ||
 564               	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 565 mike     1.13 	p++;
 566               
 567                   char* term = p;
 568               
 569                   _skipWhitespace(p);
 570               
 571                   if (*p != '=')
 572               	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 573               
 574                   p++;
 575               
 576                   _skipWhitespace(p);
 577               
 578                   *term = '\0';
 579               }
 580               
 581               void XmlParser::_getAttributeValue(char*& p)
 582               {
 583                   // ATTN-B: handle values contained in semiquotes:
 584               
 585                   if (*p != '"' && *p != '\'')
 586 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 587               
 588                   char startChar = *p++;
 589               
 590                   while (*p && *p != startChar)
 591               	p++;
 592               
 593                   if (*p != startChar)
 594               	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 595               
 596                   *p++ = '\0';
 597               }
 598               
 599               void XmlParser::_getComment(char*& p)
 600               {
 601                   // Now p points to first non-whitespace character beyond "<--" sequence:
 602               
 603                   for (; *p; p++)
 604                   {
 605               	if (p[0] == '-' && p[1] == '-')
 606               	{
 607 mike     1.13 	    if (p[2] != '>')
 608               	    {
 609               		throw XmlException(
 610               		    XmlException::MINUS_MINUS_IN_COMMENT, _line);
 611               	    }
 612               
 613               	    // Find end of comment (excluding whitespace):
 614               
 615               	    *p = '\0';
 616               	    p += 3;
 617               	    return;
 618               	}
 619                   }
 620               
 621                   // If it got this far, then the comment is unterminated:
 622               
 623                   throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 624               }
 625               
 626               void XmlParser::_getCData(char*& p)
 627               {
 628 mike     1.13     // At this point p points one past "<![CDATA[" sequence:
 629               
 630                   for (; *p; p++)
 631                   {
 632               	if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 633               	{
 634               	    *p = '\0';
 635               	    p += 3;
 636               	    return;
 637               	}
 638               	else if (*p == '\n')
 639               	    _line++;
 640                   }
 641               
 642                   // If it got this far, then the comment is unterminated:
 643               
 644                   throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 645               }
 646               
 647               void XmlParser::_getDocType(char*& p)
 648               {
 649 mike     1.13     // Just ignore the DOCTYPE command for now:
 650               
 651                   for (; *p && *p != '>'; p++)
 652                   {
 653               	if (*p == '\n')
 654               	    _line++;
 655                   }
 656               
 657                   if (*p != '>')
 658               	throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 659               
 660                   p++;
 661               }
 662               
 663               void XmlParser::_getContent(char*& p)
 664               {
 665                   while (*p && *p != '<')
 666                   {
 667               	if (*p == '\n')
 668               	    _line++;
 669               
 670 mike     1.13 	p++;
 671                   }
 672               }
 673               
 674               void XmlParser::_substituteReferences(char* text)
 675               {
 676                   Uint32 rem = strlen(text);
 677               
 678                   for (char* p = text; *p; p++, rem--)
 679                   {
 680               	if (*p == '&')
 681               	{
 682 kumpf    1.18             // Process character or entity reference
 683 mike     1.13 
 684 kumpf    1.18             Uint16 referenceChar = 0;
 685                           Uint32 referenceLength = 0;
 686                           XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 687               
 688                           if (*(p+1) == '#')
 689                           {
 690                               // Found a character (numeric) reference
 691                               // Determine whether it is decimal or hex
 692                               if (*(p+2) == 'x')
 693                               {
 694                                   // Decode a hexadecimal character reference
 695                                   char* q = p+3;
 696               
 697                                   // At most four digits are allowed, plus trailing ';'
 698                                   Uint32 numDigits;
 699                                   for (numDigits = 0; numDigits < 5; numDigits++, q++)
 700                                   {
 701                                       if (isdigit(*q))
 702                                       {
 703                                           referenceChar = (referenceChar << 4);
 704                                           referenceChar += (*q - '0');
 705 kumpf    1.18                         }
 706                                       else if ((*q >= 'A') && (*q <= 'F'))
 707                                       {
 708                                           referenceChar = (referenceChar << 4);
 709                                           referenceChar += (*q - 'A' + 10);
 710                                       }
 711                                       else if ((*q >= 'a') && (*q <= 'f'))
 712                                       {
 713                                           referenceChar = (referenceChar << 4);
 714                                           referenceChar += (*q - 'a' + 10);
 715                                       }
 716                                       else if (*q == ';')
 717                                       {
 718                                           break;
 719                                       }
 720                                       else
 721                                       {
 722                                           throw XmlException(code, _line);
 723                                       }
 724                                   }
 725               
 726 kumpf    1.18                     // Hex number must be 1 - 4 digits
 727                                   if ((numDigits == 0) || (numDigits > 4))
 728                                   {
 729                                       throw XmlException(code, _line);
 730                                   }
 731               
 732                                   // ATTN: Currently do not support 16-bit characters
 733                                   if (referenceChar > 0xff)
 734                                   {
 735                                       // ATTN: Is there a good way to say "unsupported"?
 736                                       throw XmlException(code, _line);
 737                                   }
 738               
 739                                   referenceLength = numDigits + 4;
 740                               }
 741                               else
 742                               {
 743                                   // Decode a decimal character reference
 744                                   Uint32 newChar = 0;
 745                                   char* q = p+2;
 746               
 747 kumpf    1.18                     // At most five digits are allowed, plus trailing ';'
 748                                   Uint32 numDigits;
 749                                   for (numDigits = 0; numDigits < 6; numDigits++, q++)
 750                                   {
 751                                       if (isdigit(*q))
 752                                       {
 753                                           newChar = (newChar * 10);
 754                                           newChar += (*q - '0');
 755                                       }
 756                                       else if (*q == ';')
 757                                       {
 758                                           break;
 759                                       }
 760                                       else
 761                                       {
 762                                           throw XmlException(code, _line);
 763                                       }
 764                                   }
 765               
 766                                   // Decimal number must be 1 - 5 digits and fit in 16 bits
 767                                   if ((numDigits == 0) || (numDigits > 5) ||
 768 kumpf    1.18                         (newChar > 0xffff))
 769                                   {
 770                                       throw XmlException(code, _line);
 771                                   }
 772               
 773                                   // ATTN: Currently do not support 16-bit characters
 774                                   if (newChar > 0xff)
 775                                   {
 776                                       // ATTN: Is there a good way to say "unsupported"?
 777                                       throw XmlException(code, _line);
 778                                   }
 779               
 780                                   referenceChar = Uint16(newChar);
 781                                   referenceLength = numDigits + 3;
 782                               }
 783                           }
 784                           else
 785                           {
 786                               // Check for entity reference
 787                               // ATTN: Inefficient if many entity references are supported
 788                               Uint32 i;
 789 kumpf    1.18                 for (i = 0; i < _REFERENCES_SIZE; i++)
 790                               {
 791                                   Uint32 length = _references[i].length;
 792                                   const char* match = _references[i].match;
 793               
 794                                   if (strncmp(p, _references[i].match, length) == 0)
 795                                   {
 796                                       referenceChar = _references[i].replacement;
 797                                       referenceLength = length;
 798                                       break;
 799                                   }
 800                               }
 801               
 802                               if (i == _REFERENCES_SIZE)
 803                               {
 804                                   // Didn't recognize the entity reference
 805                                   // ATTN: Is there a good way to say "unsupported"?
 806                                   throw XmlException(code, _line);
 807                               }
 808                           }
 809               
 810 kumpf    1.18             // Replace the reference with the correct character
 811                           *p = (char)referenceChar;
 812                           char* q = p + referenceLength;
 813                           rem = rem - referenceLength + 1;
 814                           memmove(p + 1, q, rem);
 815 mike     1.13 	}
 816                   }
 817               }
 818               
 819               static const char _EMPTY_STRING[] = "";
 820               
 821               void XmlParser::_getElement(char*& p, XmlEntry& entry)
 822               {
 823                   entry.attributeCount = 0;
 824               
 825                   //--------------------------------------------------------------------------
 826                   // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 827                   //--------------------------------------------------------------------------
 828               
 829                   if (*p == '?')
 830                   {
 831               	entry.type = XmlEntry::XML_DECLARATION;
 832               	entry.text = ++p;
 833               
 834               	Boolean openCloseElement = false;
 835               
 836 mike     1.13 	if (_getElementName(p))
 837               	    return;
 838                   }
 839                   else if (*p == '!')
 840                   {
 841               	p++;
 842               
 843               	// Expect a comment or CDATA:
 844               
 845               	if (p[0] == '-' && p[1] == '-')
 846               	{
 847               	    p += 2;
 848               	    entry.type = XmlEntry::COMMENT;
 849               	    entry.text = p;
 850               	    _getComment(p);
 851               	    return;
 852               	}
 853               	else if (memcmp(p, "[CDATA[", 7) == 0)
 854               	{
 855               	    p += 7;
 856               	    entry.type = XmlEntry::CDATA;
 857 mike     1.13 	    entry.text = p;
 858               	    _getCData(p);
 859               	    return;
 860               	}
 861               	else if (memcmp(p, "DOCTYPE", 7) == 0)
 862               	{
 863               	    entry.type = XmlEntry::DOCTYPE;
 864               	    entry.text = _EMPTY_STRING;
 865               	    _getDocType(p);
 866               	    return;
 867               	}
 868               	throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 869                   }
 870                   else if (*p == '/')
 871                   {
 872               	entry.type = XmlEntry::END_TAG;
 873               	entry.text = ++p;
 874               
 875               	if (!_getElementName(p))
 876               	    throw(XmlException(XmlException::BAD_END_TAG, _line));
 877               
 878 mike     1.13 	return;
 879                   }
 880 david    1.22     else if (String::isUTF8(p))
 881 mike     1.13     {
 882               	entry.type = XmlEntry::START_TAG;
 883               	entry.text = p;
 884               
 885               	Boolean openCloseElement = false;
 886               
 887               	if (_getOpenElementName(p, openCloseElement))
 888               	{
 889               	    if (openCloseElement)
 890               		entry.type = XmlEntry::EMPTY_TAG;
 891               	    return;
 892               	}
 893                   }
 894                   else
 895               	throw XmlException(XmlException::BAD_START_TAG, _line);
 896               
 897                   //--------------------------------------------------------------------------
 898                   // Grab all the attributes:
 899                   //--------------------------------------------------------------------------
 900               
 901                   for (;;)
 902 mike     1.13     {
 903               	if (entry.type == XmlEntry::XML_DECLARATION)
 904               	{
 905               	    if (p[0] == '?' && p[1] == '>')
 906               	    {
 907               		p += 2;
 908               		return;
 909               	    }
 910               	}
 911               	else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 912               	{
 913               	    entry.type = XmlEntry::EMPTY_TAG;
 914               	    p += 2;
 915               	    return;
 916               	}
 917               	else if (*p == '>')
 918               	{
 919               	    p++;
 920               	    return;
 921               	}
 922               
 923 mike     1.13 	XmlAttribute attr;
 924               	attr.name = p;
 925               	_getAttributeNameAndEqual(p);
 926               
 927               	if (*p != '"' && *p != '\'')
 928               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 929               
 930               	attr.value = p + 1;
 931               	_getAttributeValue(p);
 932               
 933               	if (entry.type == XmlEntry::XML_DECLARATION)
 934               	{
 935               	    // The next thing must a space or a "?>":
 936               
 937               	    if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
 938               	    {
 939               		throw XmlException(
 940               		    XmlException::BAD_ATTRIBUTE_VALUE, _line);
 941               	    }
 942               	}
 943               	else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
 944 mike     1.13 	{
 945               	    // The next thing must be a space or a '>':
 946               
 947               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 948               	}
 949               
 950               	_skipWhitespace(p);
 951               
 952               	if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 953               	    throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 954               
 955               	_substituteReferences((char*)attr.value);
 956               	entry.attributes[entry.attributeCount++] = attr;
 957                   }
 958               }
 959               
 960               static const char* _typeStrings[] =
 961               {
 962                   "XML_DECLARATION", 
 963                   "START_TAG", 
 964                   "EMPTY_TAG", 
 965 mike     1.13     "END_TAG", 
 966                   "COMMENT",
 967                   "CDATA",
 968                   "DOCTYPE",
 969                   "CONTENT" 
 970               };
 971               
 972               void XmlEntry::print() const
 973               {
 974                   PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
 975               
 976                   Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
 977               
 978                   if (needQuotes)
 979               	PEGASUS_STD(cout) << "\"";
 980               	
 981                   _printValue(text);
 982               
 983                   if (needQuotes)
 984               	PEGASUS_STD(cout) << "\"";
 985               
 986 mike     1.13     PEGASUS_STD(cout) << '\n';
 987               
 988                   for (Uint32 i = 0; i < attributeCount; i++)
 989                   {
 990               	PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
 991               	_printValue(attributes[i].value);
 992               	PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
 993                   }
 994               }
 995               
 996               const XmlAttribute* XmlEntry::findAttribute(
 997                   const char* name) const
 998               {
 999                   for (Uint32 i = 0; i < attributeCount; i++)
1000                   {
1001               	if (strcmp(attributes[i].name, name) == 0)
1002               	    return &attributes[i];
1003                   }
1004               
1005                   return 0;
1006               }
1007 mike     1.13 
1008               // Find first non-whitespace character (set first) and last non-whitespace
1009               // character (set last one past this). For example, consider this string:
1010               //
1011               //	"   87     "
1012               //
1013               // The first pointer would point to '8' and the last pointer woudl point one
1014               // beyond '7'.
1015               
1016               static void _findEnds(
1017                   const char* str, 
1018                   const char*& first, 
1019                   const char*& last)
1020               {
1021                   first = str;
1022               
1023                   while (isspace(*first))
1024               	first++;
1025               
1026                   if (!*first)
1027                   {
1028 mike     1.13 	last = first;
1029               	return;
1030                   }
1031               
1032                   last = first + strlen(first);
1033               
1034                   while (last != first && isspace(last[-1]))
1035               	last--;
1036               }
1037               
1038               Boolean XmlEntry::getAttributeValue(
1039                   const char* name, 
1040                   Uint32& value) const
1041               {
1042                   const XmlAttribute* attr = findAttribute(name);
1043               
1044                   if (!attr)
1045               	return false;
1046               
1047                   const char* first;
1048                   const char* last;
1049 mike     1.13     _findEnds(attr->value, first, last);
1050               
1051                   char* end = 0;
1052                   long tmp = strtol(first, &end, 10);
1053               
1054                   if (!end || end != last)
1055               	return false;
1056               
1057                   value = Uint32(tmp);
1058                   return true;
1059               }
1060               
1061               Boolean XmlEntry::getAttributeValue(
1062                   const char* name, 
1063                   Real32& value) const
1064               {
1065                   const XmlAttribute* attr = findAttribute(name);
1066               
1067                   if (!attr)
1068               	return false;
1069               
1070 mike     1.13     const char* first;
1071                   const char* last;
1072                   _findEnds(attr->value, first, last);
1073               
1074                   char* end = 0;
1075                   double tmp = strtod(first, &end);
1076               
1077                   if (!end || end != last)
1078               	return false;
1079               
1080                   value = Uint32(tmp);
1081                   return true;
1082               }
1083               
1084               Boolean XmlEntry::getAttributeValue(
1085                   const char* name, 
1086                   const char*& value) const
1087               {
1088                   const XmlAttribute* attr = findAttribute(name);
1089               
1090                   if (!attr)
1091 mike     1.13 	return false;
1092               
1093                   value = attr->value;
1094                   return true;
1095               }
1096               
1097               Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1098               {
1099                   const char* tmp;
1100               
1101                   if (!getAttributeValue(name, tmp))
1102               	return false;
1103               
1104 david    1.22     value = String(tmp,STRING_FLAG_UTF8);
1105 mike     1.13     return true;
1106               }
1107               
1108               void XmlAppendCString(Array<Sint8>& out, const char* str)
1109               {
1110                   out.append(str, strlen(str));
1111               }
1112               
1113               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2