(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 mike  1.13 //%/////////////////////////////////////////////////////////////////////////////
   2            //
   3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
   4            // The Open Group, Tivoli Systems
   5 mike  1.13 //
   6            // Permission is hereby granted, free of charge, to any person obtaining a copy
   7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
   8            // deal in the Software without restriction, including without limitation the
   9            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  11            // furnished to do so, subject to the following conditions:
  12            // 
  13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  14 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  15            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  17            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21            //
  22            //==============================================================================
  23            //
  24            // Author: Mike Brasher (mbrasher@bmc.com)
  25            //
  26            // Modified By:
  27            //
  28            //%/////////////////////////////////////////////////////////////////////////////
  29            
  30            ////////////////////////////////////////////////////////////////////////////////
  31            //
  32            // XmlParser
  33            //
  34            //	This file contains a simple non-validating XML parser. Here are 
  35            //	serveral rules for well-formed XML:
  36            //
  37            //	    1.	Documents must begin with an XML declaration:
  38            //
  39            //		<?xml version="1.0" standalone="yes"?>
  40 mike  1.13 //
  41            //	    2.	Comments have the form:
  42            //
  43            //		<!-- blah blah blah -->
  44            //
  45            //	    3. The following entity references are supported:
  46            //
  47            //		&amp - ampersand
  48            //	 	&lt - less-than
  49            //		&gt - greater-than
  50            //		&quot - full quote
  51            //		&apos - apostrophe
  52            //
  53 kumpf 1.18 //             as well as character (numeric) references:
  54            
  55            //              &#49; - decimal reference for character '1'
  56            //              &#x31; - hexadecimal reference for character '1'
  57            //
  58 mike  1.13 //	    4. Element names and attribute names take the following form:
  59            //
  60            //		[A-Za-z_][A-Za-z_0-9-.:]
  61            //
  62            //	    5.	Arbitrary data (CDATA) can be enclosed like this:
  63            //
  64            //		    <![CDATA[
  65            //		    ...
  66            //		    ]]>
  67            //
  68            //	    6.	Element names and attributes names are case-sensitive.
  69            //
  70            //	    7.	XmlAttribute values must be delimited by full or half quotes.
  71            //		XmlAttribute values must be delimited.
  72            //
  73            //	    8.  <!DOCTYPE...>
  74            //
  75            // TODO:
  76            //
  77 karl  1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  78 mike  1.13 //	Handle <!DOCTYPE...> sections which are complicated (containing
  79            //        rules rather than references to files).
  80            //
  81            //	Remove newlines from string literals:
  82            //
  83            //          Example: <xyz x="hello
  84            //		world">
  85            //
  86            ////////////////////////////////////////////////////////////////////////////////
  87            
  88 sage  1.14 #include <Pegasus/Common/Config.h>
  89 mike  1.13 #include <cctype>
  90            #include <cstdio>
  91            #include <cstdlib>
  92            #include <cstring>
  93            #include "XmlParser.h"
  94            #include "Logger.h"
  95 chuck 1.19 #include "ExceptionRep.h"
  96 mike  1.13 
  97            PEGASUS_NAMESPACE_BEGIN
  98            
  99            #define PEGASUS_ARRAY_T XmlEntry
 100            # include "ArrayImpl.h"
 101            #undef PEGASUS_ARRAY_T
 102            
 103            
 104            ////////////////////////////////////////////////////////////////////////////////
 105            //
 106            // Static helper functions
 107            //
 108            ////////////////////////////////////////////////////////////////////////////////
 109            
 110            static void _printValue(const char* p)
 111            {
 112                for (; *p; p++)
 113                {
 114            	if (*p == '\n')
 115            	    PEGASUS_STD(cout) << "\\n";
 116            	else if (*p == '\r')
 117 mike  1.13 	    PEGASUS_STD(cout) << "\\r";
 118            	else if (*p == '\t')
 119            	    PEGASUS_STD(cout) << "\\t";
 120            	else
 121            	    PEGASUS_STD(cout) << *p;
 122                }
 123            }
 124            
 125            struct EntityReference
 126            {
 127                const char* match;
 128                Uint32 length;
 129                char replacement;
 130            };
 131            
 132 kumpf 1.18 // ATTN: Add support for more entity references
 133 mike  1.13 static EntityReference _references[] =
 134            {
 135                { "&amp;", 5, '&' },
 136                { "&lt;", 4, '<' },
 137                { "&gt;", 4, '>' },
 138                { "&quot;", 6, '"' },
 139                { "&apos;", 6, '\'' }
 140            };
 141            
 142            static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 143            
 144            // Remove all redundant spaces from the given string:
 145            
 146            static void _normalize(char* text)
 147            {
 148                Uint32 length = strlen(text);
 149                char* p = text;
 150                char* end = p + length;
 151            
 152                // Remove leading spaces:
 153            
 154 mike  1.13     while (isspace(*p))
 155            	p++;
 156            
 157                if (p != text)
 158            	memmove(text, p, end - p + 1);
 159            
 160                p = text;
 161            
 162                // Look for sequences of more than one space and remove all but one.
 163            
 164                for (;;)
 165                {
 166            	// Advance to the next space:
 167            
 168            	while (*p && !isspace(*p))
 169            	    p++;
 170            
 171            	if (!*p)
 172            	    break;
 173            
 174            	// Advance to the next non-space:
 175 mike  1.13 
 176            	char* q = p++;
 177            
 178            	while (isspace(*p))
 179            	    p++;
 180            
 181            	// Discard trailing spaces (if we are at the end):
 182            
 183            	if (!*p)
 184            	{
 185            	    *q = '\0';
 186            	    break;
 187            	}
 188            
 189            	// Remove the redundant spaces:
 190            
 191            	Uint32 n = p - q;
 192            
 193            	if (n > 1)
 194            	{
 195            	    *q++ = ' ';
 196 mike  1.13 	    memmove(q, p, end - p + 1);
 197            	    p = q;
 198            	}
 199                }
 200            }
 201            
 202            ////////////////////////////////////////////////////////////////////////////////
 203            //
 204            // XmlException
 205            //
 206            ////////////////////////////////////////////////////////////////////////////////
 207            
 208            static const char* _xmlMessages[] =
 209            {
 210                "Bad opening element",
 211                "Bad closing element",
 212                "Bad attribute name",
 213                "Exepected equal sign",
 214                "Bad attribute value",
 215                "A \"--\" sequence found within comment",
 216                "Unterminated comment",
 217 mike  1.13     "Unterminated CDATA block",
 218                "Unterminated DOCTYPE",
 219                "Too many attributes: parser only handles 10",
 220                "Malformed reference",
 221                "Expected a comment or CDATA following \"<!\" sequence",
 222                "Closing element does not match opening element",
 223                "One or more tags are still open",
 224                "More than one root element was encountered",
 225                "Validation error",
 226                "Semantic error"
 227            };
 228            
 229 chuck 1.19 static const char* _xmlKeys[] = 
 230            {
 231 humberto 1.20     "Common.XmlParser.BAD_START_TAG",
 232 chuck    1.19     "Common.XmlParser.BAD_END_TAG",
 233                   "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 234                   "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 235                   "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 236                   "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 237                   "Common.XmlParser.UNTERMINATED_COMMENT",
 238                   "Common.XmlParser.UNTERMINATED_CDATA",
 239                   "Common.XmlParser.UNTERMINATED_DOCTYPE",
 240                   "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 241                   "Common.XmlParser.MALFORMED_REFERENCE",
 242                   "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 243                   "Common.XmlParser.START_END_MISMATCH",
 244                   "Common.XmlParser.UNCLOSED_TAGS", 
 245                   "Common.XmlParser.MULTIPLE_ROOTS",
 246                   "Common.XmlParser.VALIDATION_ERROR",
 247                   "Common.XmlParser.SEMANTIC_ERROR"
 248               };
 249               
 250 chuck    1.23 // l10n replace _formMessage (comment out the old one)
 251 chuck    1.19 /*
 252 mike     1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 253               {
 254                   String result = _xmlMessages[Uint32(code) - 1];
 255               
 256                   char buffer[32];
 257                   sprintf(buffer, "%d", line);
 258                   result.append(": on line ");
 259                   result.append(buffer);
 260               
 261                   if (message.size())
 262                   {
 263               	result.append(": ");
 264               	result.append(message);
 265                   }
 266               
 267                   return result;
 268               }
 269 chuck    1.19 */
 270               
 271               static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 272               {
 273                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 274                   String key = _xmlKeys[Uint32(code) - 1];
 275               	String msg = message;
 276               
 277                   dftMsg.append(": on line $0");
 278                   if (message.size())
 279                   {
 280 humberto 1.20     	msg = ": " + msg;
 281 chuck    1.19     	dftMsg.append("$1");
 282                   }    
 283               
 284                   return MessageLoaderParms(key, dftMsg, line ,msg);
 285               }
 286               
 287               static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 288               {
 289                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 290                   String key = _xmlKeys[Uint32(code) - 1];
 291               
 292                   dftMsg.append(": on line $0");
 293                
 294                   return MessageLoaderParms(key, dftMsg, line);
 295               }
 296               
 297 mike     1.13 
 298               XmlException::XmlException(
 299                   XmlException::Code code, 
 300                   Uint32 lineNumber,
 301                   const String& message) 
 302                   : Exception(_formMessage(code, lineNumber, message))
 303               {
 304               
 305               }
 306               
 307 chuck    1.19 
 308               XmlException::XmlException(
 309                   XmlException::Code code, 
 310                   Uint32 lineNumber,
 311                   MessageLoaderParms& msgParms) 
 312                   : Exception(_formPartialMessage(code, lineNumber))
 313               {
 314 humberto 1.21 	if (msgParms.default_msg.size())
 315                   {
 316                   	msgParms.default_msg = ": " + msgParms.default_msg;
 317                   } 
 318 chuck    1.19 	_rep->message.append(MessageLoader::getMessage(msgParms));
 319               }
 320               
 321               
 322 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 323               //
 324               // XmlValidationError
 325               //
 326               ////////////////////////////////////////////////////////////////////////////////
 327               
 328               XmlValidationError::XmlValidationError(
 329                   Uint32 lineNumber,
 330                   const String& message)
 331                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 332               {
 333               
 334               }
 335               
 336 chuck    1.19 
 337               XmlValidationError::XmlValidationError(
 338                   Uint32 lineNumber,
 339                   MessageLoaderParms& msgParms)
 340                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 341               {
 342               
 343               }
 344               
 345               
 346 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 347               //
 348               // XmlSemanticError
 349               //
 350               ////////////////////////////////////////////////////////////////////////////////
 351               
 352               XmlSemanticError::XmlSemanticError(
 353                   Uint32 lineNumber,
 354                   const String& message)
 355                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 356               {
 357               
 358               }
 359 chuck    1.19 
 360               
 361               XmlSemanticError::XmlSemanticError(
 362                   Uint32 lineNumber,
 363                   MessageLoaderParms& msgParms)
 364                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 365               {
 366               
 367               }
 368               
 369 mike     1.13 
 370               ////////////////////////////////////////////////////////////////////////////////
 371               //
 372               // XmlParser
 373               //
 374               ////////////////////////////////////////////////////////////////////////////////
 375               
 376               XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 
 377                   _restoreChar('\0'), _foundRoot(false)
 378               {
 379               
 380               }
 381               
 382               Boolean XmlParser::next(XmlEntry& entry)
 383               {
 384                   if (!_putBackStack.isEmpty())
 385                   {
 386               	entry = _putBackStack.top();
 387               	_putBackStack.pop();
 388               	return true;
 389                   }
 390 mike     1.13 
 391                   // If a character was overwritten with a null-terminator the last
 392                   // time this routine was called, then put back that character. Before
 393                   // exiting of course, restore the null-terminator.
 394               
 395                   char* nullTerminator = 0;
 396               
 397                   if (_restoreChar && !*_current)
 398                   {
 399               	nullTerminator = _current;
 400               	*_current = _restoreChar;
 401               	_restoreChar = '\0';
 402                   }
 403               
 404                   // Skip over any whitespace:
 405               
 406                   _skipWhitespace(_current);
 407               
 408                   if (!*_current)
 409                   {
 410               	if (nullTerminator)
 411 mike     1.13 	    *nullTerminator = '\0';
 412               
 413               	if (!_stack.isEmpty())
 414               	    throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 415               
 416               	return false;
 417                   }
 418               
 419                   // Either a "<...>" or content begins next:
 420               
 421                   if (*_current == '<')
 422                   {
 423               	_current++;
 424               	_getElement(_current, entry);
 425               
 426               	if (nullTerminator)
 427               	    *nullTerminator = '\0';
 428               
 429               	if (entry.type == XmlEntry::START_TAG)
 430               	{
 431               	    if (_stack.isEmpty() && _foundRoot)
 432 mike     1.13 		throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 433               
 434               	    _foundRoot = true;
 435               	    _stack.push((char*)entry.text);
 436               	}
 437               	else if (entry.type == XmlEntry::END_TAG)
 438               	{
 439               	    if (_stack.isEmpty())
 440               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 441               
 442               	    if (strcmp(_stack.top(), entry.text) != 0)
 443               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 444               
 445               	    _stack.pop();
 446               	}
 447               
 448               	return true;
 449                   }
 450                   else
 451                   {
 452               	entry.type = XmlEntry::CONTENT;
 453 mike     1.13 	entry.text = _current;
 454               	_getContent(_current);
 455               	_restoreChar = *_current;
 456               	*_current = '\0';
 457               
 458               	if (nullTerminator)
 459               	    *nullTerminator = '\0';
 460               
 461               	_substituteReferences((char*)entry.text);
 462               	_normalize((char*)entry.text);
 463               
 464               	return true;
 465                   }
 466               }
 467               
 468               void XmlParser::putBack(XmlEntry& entry)
 469               {
 470                   _putBackStack.push(entry);
 471               }
 472               
 473               XmlParser::~XmlParser()
 474 mike     1.13 {
 475                   // Nothing to do!
 476               }
 477               
 478               void XmlParser::_skipWhitespace(char*& p)
 479               {
 480                   while (*p && isspace(*p))
 481                   {
 482               	if (*p == '\n')
 483               	    _line++;
 484               
 485               	p++;
 486                   }
 487               }
 488               
 489               Boolean XmlParser::_getElementName(char*& p)
 490               {
 491 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 492 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 493                         (*p == '_')))
 494 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 495 kumpf    1.24     p++;
 496 mike     1.13 
 497 david    1.22     while ((*p) &&
 498               	   (((*p >= 'A') && (*p <= 'Z')) ||
 499               	    ((*p >= 'a') && (*p <= 'z')) ||
 500 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 501 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 502 mike     1.13 	p++;
 503               
 504                   // The next character must be a space:
 505               
 506                   if (isspace(*p))
 507                   {
 508               	*p++ = '\0';
 509               	_skipWhitespace(p);
 510                   }
 511               
 512                   if (*p == '>')
 513                   {
 514               	*p++ = '\0';
 515               	return true;
 516                   }
 517               
 518                   return false;
 519               }
 520               
 521               Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 522               {
 523 mike     1.13     openCloseElement = false;
 524               
 525 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 526 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 527                         (*p == '_')))
 528 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 529 kumpf    1.24     p++;
 530 mike     1.13 
 531 david    1.22     while ((*p) &&
 532               	   (((*p >= 'A') && (*p <= 'Z')) ||
 533               	    ((*p >= 'a') && (*p <= 'z')) ||
 534 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 535 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 536 mike     1.13 	p++;
 537               
 538                   // The next character must be a space:
 539               
 540                   if (isspace(*p))
 541                   {
 542               	*p++ = '\0';
 543               	_skipWhitespace(p);
 544                   }
 545               
 546                   if (*p == '>')
 547                   {
 548               	*p++ = '\0';
 549               	return true;
 550                   }
 551               
 552                   if (p[0] == '/' && p[1] == '>')
 553                   {
 554               	openCloseElement = true;
 555               	*p = '\0';
 556               	p += 2;
 557 mike     1.13 	return true;
 558                   }
 559               
 560                   return false;
 561               }
 562               
 563               void XmlParser::_getAttributeNameAndEqual(char*& p)
 564               {
 565 david    1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 566 kumpf    1.24           ((*p >= 'a') && (*p <= 'z')) ||
 567                         (*p == '_')))
 568 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 569 kumpf    1.24     p++;
 570 mike     1.13 
 571 david    1.22     while ((*p) &&
 572               	   (((*p >= 'A') && (*p <= 'Z')) ||
 573               	    ((*p >= 'a') && (*p <= 'z')) ||
 574 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 575 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 576 mike     1.13 	p++;
 577               
 578                   char* term = p;
 579               
 580                   _skipWhitespace(p);
 581               
 582                   if (*p != '=')
 583               	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 584               
 585                   p++;
 586               
 587                   _skipWhitespace(p);
 588               
 589                   *term = '\0';
 590               }
 591               
 592               void XmlParser::_getAttributeValue(char*& p)
 593               {
 594                   // ATTN-B: handle values contained in semiquotes:
 595               
 596                   if (*p != '"' && *p != '\'')
 597 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 598               
 599                   char startChar = *p++;
 600               
 601                   while (*p && *p != startChar)
 602               	p++;
 603               
 604                   if (*p != startChar)
 605               	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 606               
 607                   *p++ = '\0';
 608               }
 609               
 610               void XmlParser::_getComment(char*& p)
 611               {
 612                   // Now p points to first non-whitespace character beyond "<--" sequence:
 613               
 614                   for (; *p; p++)
 615                   {
 616               	if (p[0] == '-' && p[1] == '-')
 617               	{
 618 mike     1.13 	    if (p[2] != '>')
 619               	    {
 620               		throw XmlException(
 621               		    XmlException::MINUS_MINUS_IN_COMMENT, _line);
 622               	    }
 623               
 624               	    // Find end of comment (excluding whitespace):
 625               
 626               	    *p = '\0';
 627               	    p += 3;
 628               	    return;
 629               	}
 630                   }
 631               
 632                   // If it got this far, then the comment is unterminated:
 633               
 634                   throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 635               }
 636               
 637               void XmlParser::_getCData(char*& p)
 638               {
 639 mike     1.13     // At this point p points one past "<![CDATA[" sequence:
 640               
 641                   for (; *p; p++)
 642                   {
 643               	if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 644               	{
 645               	    *p = '\0';
 646               	    p += 3;
 647               	    return;
 648               	}
 649               	else if (*p == '\n')
 650               	    _line++;
 651                   }
 652               
 653                   // If it got this far, then the comment is unterminated:
 654               
 655                   throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 656               }
 657               
 658               void XmlParser::_getDocType(char*& p)
 659               {
 660 mike     1.13     // Just ignore the DOCTYPE command for now:
 661               
 662                   for (; *p && *p != '>'; p++)
 663                   {
 664               	if (*p == '\n')
 665               	    _line++;
 666                   }
 667               
 668                   if (*p != '>')
 669               	throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 670               
 671                   p++;
 672               }
 673               
 674               void XmlParser::_getContent(char*& p)
 675               {
 676                   while (*p && *p != '<')
 677                   {
 678               	if (*p == '\n')
 679               	    _line++;
 680               
 681 mike     1.13 	p++;
 682                   }
 683               }
 684               
 685               void XmlParser::_substituteReferences(char* text)
 686               {
 687                   Uint32 rem = strlen(text);
 688               
 689                   for (char* p = text; *p; p++, rem--)
 690                   {
 691               	if (*p == '&')
 692               	{
 693 kumpf    1.18             // Process character or entity reference
 694 mike     1.13 
 695 kumpf    1.18             Uint16 referenceChar = 0;
 696                           Uint32 referenceLength = 0;
 697                           XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 698               
 699                           if (*(p+1) == '#')
 700                           {
 701                               // Found a character (numeric) reference
 702                               // Determine whether it is decimal or hex
 703                               if (*(p+2) == 'x')
 704                               {
 705                                   // Decode a hexadecimal character reference
 706                                   char* q = p+3;
 707               
 708                                   // At most four digits are allowed, plus trailing ';'
 709                                   Uint32 numDigits;
 710                                   for (numDigits = 0; numDigits < 5; numDigits++, q++)
 711                                   {
 712                                       if (isdigit(*q))
 713                                       {
 714                                           referenceChar = (referenceChar << 4);
 715                                           referenceChar += (*q - '0');
 716 kumpf    1.18                         }
 717                                       else if ((*q >= 'A') && (*q <= 'F'))
 718                                       {
 719                                           referenceChar = (referenceChar << 4);
 720                                           referenceChar += (*q - 'A' + 10);
 721                                       }
 722                                       else if ((*q >= 'a') && (*q <= 'f'))
 723                                       {
 724                                           referenceChar = (referenceChar << 4);
 725                                           referenceChar += (*q - 'a' + 10);
 726                                       }
 727                                       else if (*q == ';')
 728                                       {
 729                                           break;
 730                                       }
 731                                       else
 732                                       {
 733                                           throw XmlException(code, _line);
 734                                       }
 735                                   }
 736               
 737 kumpf    1.18                     // Hex number must be 1 - 4 digits
 738                                   if ((numDigits == 0) || (numDigits > 4))
 739                                   {
 740                                       throw XmlException(code, _line);
 741                                   }
 742               
 743                                   // ATTN: Currently do not support 16-bit characters
 744                                   if (referenceChar > 0xff)
 745                                   {
 746                                       // ATTN: Is there a good way to say "unsupported"?
 747                                       throw XmlException(code, _line);
 748                                   }
 749               
 750                                   referenceLength = numDigits + 4;
 751                               }
 752                               else
 753                               {
 754                                   // Decode a decimal character reference
 755                                   Uint32 newChar = 0;
 756                                   char* q = p+2;
 757               
 758 kumpf    1.18                     // At most five digits are allowed, plus trailing ';'
 759                                   Uint32 numDigits;
 760                                   for (numDigits = 0; numDigits < 6; numDigits++, q++)
 761                                   {
 762                                       if (isdigit(*q))
 763                                       {
 764                                           newChar = (newChar * 10);
 765                                           newChar += (*q - '0');
 766                                       }
 767                                       else if (*q == ';')
 768                                       {
 769                                           break;
 770                                       }
 771                                       else
 772                                       {
 773                                           throw XmlException(code, _line);
 774                                       }
 775                                   }
 776               
 777                                   // Decimal number must be 1 - 5 digits and fit in 16 bits
 778                                   if ((numDigits == 0) || (numDigits > 5) ||
 779 kumpf    1.18                         (newChar > 0xffff))
 780                                   {
 781                                       throw XmlException(code, _line);
 782                                   }
 783               
 784                                   // ATTN: Currently do not support 16-bit characters
 785                                   if (newChar > 0xff)
 786                                   {
 787                                       // ATTN: Is there a good way to say "unsupported"?
 788                                       throw XmlException(code, _line);
 789                                   }
 790               
 791                                   referenceChar = Uint16(newChar);
 792                                   referenceLength = numDigits + 3;
 793                               }
 794                           }
 795                           else
 796                           {
 797                               // Check for entity reference
 798                               // ATTN: Inefficient if many entity references are supported
 799                               Uint32 i;
 800 kumpf    1.18                 for (i = 0; i < _REFERENCES_SIZE; i++)
 801                               {
 802                                   Uint32 length = _references[i].length;
 803                                   const char* match = _references[i].match;
 804               
 805                                   if (strncmp(p, _references[i].match, length) == 0)
 806                                   {
 807                                       referenceChar = _references[i].replacement;
 808                                       referenceLength = length;
 809                                       break;
 810                                   }
 811                               }
 812               
 813                               if (i == _REFERENCES_SIZE)
 814                               {
 815                                   // Didn't recognize the entity reference
 816                                   // ATTN: Is there a good way to say "unsupported"?
 817                                   throw XmlException(code, _line);
 818                               }
 819                           }
 820               
 821 kumpf    1.18             // Replace the reference with the correct character
 822                           *p = (char)referenceChar;
 823                           char* q = p + referenceLength;
 824                           rem = rem - referenceLength + 1;
 825                           memmove(p + 1, q, rem);
 826 mike     1.13 	}
 827                   }
 828               }
 829               
 830               static const char _EMPTY_STRING[] = "";
 831               
 832               void XmlParser::_getElement(char*& p, XmlEntry& entry)
 833               {
 834                   entry.attributeCount = 0;
 835               
 836                   //--------------------------------------------------------------------------
 837                   // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 838                   //--------------------------------------------------------------------------
 839               
 840                   if (*p == '?')
 841                   {
 842               	entry.type = XmlEntry::XML_DECLARATION;
 843               	entry.text = ++p;
 844               
 845               	Boolean openCloseElement = false;
 846               
 847 mike     1.13 	if (_getElementName(p))
 848               	    return;
 849                   }
 850                   else if (*p == '!')
 851                   {
 852               	p++;
 853               
 854               	// Expect a comment or CDATA:
 855               
 856               	if (p[0] == '-' && p[1] == '-')
 857               	{
 858               	    p += 2;
 859               	    entry.type = XmlEntry::COMMENT;
 860               	    entry.text = p;
 861               	    _getComment(p);
 862               	    return;
 863               	}
 864               	else if (memcmp(p, "[CDATA[", 7) == 0)
 865               	{
 866               	    p += 7;
 867               	    entry.type = XmlEntry::CDATA;
 868 mike     1.13 	    entry.text = p;
 869               	    _getCData(p);
 870               	    return;
 871               	}
 872               	else if (memcmp(p, "DOCTYPE", 7) == 0)
 873               	{
 874               	    entry.type = XmlEntry::DOCTYPE;
 875               	    entry.text = _EMPTY_STRING;
 876               	    _getDocType(p);
 877               	    return;
 878               	}
 879               	throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 880                   }
 881                   else if (*p == '/')
 882                   {
 883               	entry.type = XmlEntry::END_TAG;
 884               	entry.text = ++p;
 885               
 886               	if (!_getElementName(p))
 887               	    throw(XmlException(XmlException::BAD_END_TAG, _line));
 888               
 889 mike     1.13 	return;
 890                   }
 891 david    1.25     else if ((((*p >= 'A') && (*p <= 'Z')) ||
 892 kumpf    1.24               ((*p >= 'a') && (*p <= 'z')) ||
 893                             (*p == '_')))
 894 mike     1.13     {
 895               	entry.type = XmlEntry::START_TAG;
 896               	entry.text = p;
 897               
 898               	Boolean openCloseElement = false;
 899               
 900               	if (_getOpenElementName(p, openCloseElement))
 901               	{
 902               	    if (openCloseElement)
 903               		entry.type = XmlEntry::EMPTY_TAG;
 904               	    return;
 905               	}
 906                   }
 907                   else
 908               	throw XmlException(XmlException::BAD_START_TAG, _line);
 909               
 910                   //--------------------------------------------------------------------------
 911                   // Grab all the attributes:
 912                   //--------------------------------------------------------------------------
 913               
 914                   for (;;)
 915 mike     1.13     {
 916               	if (entry.type == XmlEntry::XML_DECLARATION)
 917               	{
 918               	    if (p[0] == '?' && p[1] == '>')
 919               	    {
 920               		p += 2;
 921               		return;
 922               	    }
 923               	}
 924               	else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 925               	{
 926               	    entry.type = XmlEntry::EMPTY_TAG;
 927               	    p += 2;
 928               	    return;
 929               	}
 930               	else if (*p == '>')
 931               	{
 932               	    p++;
 933               	    return;
 934               	}
 935               
 936 mike     1.13 	XmlAttribute attr;
 937               	attr.name = p;
 938               	_getAttributeNameAndEqual(p);
 939               
 940               	if (*p != '"' && *p != '\'')
 941               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 942               
 943               	attr.value = p + 1;
 944               	_getAttributeValue(p);
 945               
 946               	if (entry.type == XmlEntry::XML_DECLARATION)
 947               	{
 948               	    // The next thing must a space or a "?>":
 949               
 950               	    if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
 951               	    {
 952               		throw XmlException(
 953               		    XmlException::BAD_ATTRIBUTE_VALUE, _line);
 954               	    }
 955               	}
 956               	else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
 957 mike     1.13 	{
 958               	    // The next thing must be a space or a '>':
 959               
 960               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 961               	}
 962               
 963               	_skipWhitespace(p);
 964               
 965               	if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 966               	    throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 967               
 968               	_substituteReferences((char*)attr.value);
 969               	entry.attributes[entry.attributeCount++] = attr;
 970                   }
 971               }
 972               
 973               static const char* _typeStrings[] =
 974               {
 975                   "XML_DECLARATION", 
 976                   "START_TAG", 
 977                   "EMPTY_TAG", 
 978 mike     1.13     "END_TAG", 
 979                   "COMMENT",
 980                   "CDATA",
 981                   "DOCTYPE",
 982                   "CONTENT" 
 983               };
 984               
 985               void XmlEntry::print() const
 986               {
 987                   PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
 988               
 989                   Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
 990               
 991                   if (needQuotes)
 992               	PEGASUS_STD(cout) << "\"";
 993               	
 994                   _printValue(text);
 995               
 996                   if (needQuotes)
 997               	PEGASUS_STD(cout) << "\"";
 998               
 999 mike     1.13     PEGASUS_STD(cout) << '\n';
1000               
1001                   for (Uint32 i = 0; i < attributeCount; i++)
1002                   {
1003               	PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
1004               	_printValue(attributes[i].value);
1005               	PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1006                   }
1007               }
1008               
1009               const XmlAttribute* XmlEntry::findAttribute(
1010                   const char* name) const
1011               {
1012                   for (Uint32 i = 0; i < attributeCount; i++)
1013                   {
1014               	if (strcmp(attributes[i].name, name) == 0)
1015               	    return &attributes[i];
1016                   }
1017               
1018                   return 0;
1019               }
1020 mike     1.13 
1021               // Find first non-whitespace character (set first) and last non-whitespace
1022               // character (set last one past this). For example, consider this string:
1023               //
1024               //	"   87     "
1025               //
1026               // The first pointer would point to '8' and the last pointer woudl point one
1027               // beyond '7'.
1028               
1029               static void _findEnds(
1030                   const char* str, 
1031                   const char*& first, 
1032                   const char*& last)
1033               {
1034                   first = str;
1035               
1036                   while (isspace(*first))
1037               	first++;
1038               
1039                   if (!*first)
1040                   {
1041 mike     1.13 	last = first;
1042               	return;
1043                   }
1044               
1045                   last = first + strlen(first);
1046               
1047                   while (last != first && isspace(last[-1]))
1048               	last--;
1049               }
1050               
1051               Boolean XmlEntry::getAttributeValue(
1052                   const char* name, 
1053                   Uint32& value) const
1054               {
1055                   const XmlAttribute* attr = findAttribute(name);
1056               
1057                   if (!attr)
1058               	return false;
1059               
1060                   const char* first;
1061                   const char* last;
1062 mike     1.13     _findEnds(attr->value, first, last);
1063               
1064                   char* end = 0;
1065                   long tmp = strtol(first, &end, 10);
1066               
1067                   if (!end || end != last)
1068               	return false;
1069               
1070                   value = Uint32(tmp);
1071                   return true;
1072               }
1073               
1074               Boolean XmlEntry::getAttributeValue(
1075                   const char* name, 
1076                   Real32& value) const
1077               {
1078                   const XmlAttribute* attr = findAttribute(name);
1079               
1080                   if (!attr)
1081               	return false;
1082               
1083 mike     1.13     const char* first;
1084                   const char* last;
1085                   _findEnds(attr->value, first, last);
1086               
1087                   char* end = 0;
1088                   double tmp = strtod(first, &end);
1089               
1090                   if (!end || end != last)
1091               	return false;
1092               
1093                   value = Uint32(tmp);
1094                   return true;
1095               }
1096               
1097               Boolean XmlEntry::getAttributeValue(
1098                   const char* name, 
1099                   const char*& value) const
1100               {
1101                   const XmlAttribute* attr = findAttribute(name);
1102               
1103                   if (!attr)
1104 mike     1.13 	return false;
1105               
1106                   value = attr->value;
1107                   return true;
1108               }
1109               
1110               Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1111               {
1112                   const char* tmp;
1113               
1114                   if (!getAttributeValue(name, tmp))
1115               	return false;
1116               
1117 david    1.22     value = String(tmp,STRING_FLAG_UTF8);
1118 mike     1.13     return true;
1119               }
1120               
1121               void XmlAppendCString(Array<Sint8>& out, const char* str)
1122               {
1123                   out.append(str, strlen(str));
1124               }
1125               
1126               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2