(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 mike  1.13 //%/////////////////////////////////////////////////////////////////////////////
   2            //
   3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
   4            // The Open Group, Tivoli Systems
   5 mike  1.13 //
   6            // Permission is hereby granted, free of charge, to any person obtaining a copy
   7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
   8            // deal in the Software without restriction, including without limitation the
   9            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  11            // furnished to do so, subject to the following conditions:
  12            // 
  13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  14 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  15            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  17            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21            //
  22            //==============================================================================
  23            //
  24            // Author: Mike Brasher (mbrasher@bmc.com)
  25            //
  26            // Modified By:
  27            //
  28            //%/////////////////////////////////////////////////////////////////////////////
  29            
  30            ////////////////////////////////////////////////////////////////////////////////
  31            //
  32            // XmlParser
  33            //
  34            //	This file contains a simple non-validating XML parser. Here are 
  35            //	serveral rules for well-formed XML:
  36            //
  37            //	    1.	Documents must begin with an XML declaration:
  38            //
  39            //		<?xml version="1.0" standalone="yes"?>
  40 mike  1.13 //
  41            //	    2.	Comments have the form:
  42            //
  43            //		<!-- blah blah blah -->
  44            //
  45            //	    3. The following entity references are supported:
  46            //
  47            //		&amp - ampersand
  48            //	 	&lt - less-than
  49            //		&gt - greater-than
  50            //		&quot - full quote
  51            //		&apos - apostrophe
  52            //
  53 kumpf 1.18 //             as well as character (numeric) references:
  54            
  55            //              &#49; - decimal reference for character '1'
  56            //              &#x31; - hexadecimal reference for character '1'
  57            //
  58 mike  1.13 //	    4. Element names and attribute names take the following form:
  59            //
  60            //		[A-Za-z_][A-Za-z_0-9-.:]
  61            //
  62            //	    5.	Arbitrary data (CDATA) can be enclosed like this:
  63            //
  64            //		    <![CDATA[
  65            //		    ...
  66            //		    ]]>
  67            //
  68            //	    6.	Element names and attributes names are case-sensitive.
  69            //
  70            //	    7.	XmlAttribute values must be delimited by full or half quotes.
  71            //		XmlAttribute values must be delimited.
  72            //
  73            //	    8.  <!DOCTYPE...>
  74            //
  75            // TODO:
  76            //
  77 karl  1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  78 mike  1.13 //	Handle <!DOCTYPE...> sections which are complicated (containing
  79            //        rules rather than references to files).
  80            //
  81            //	Remove newlines from string literals:
  82            //
  83            //          Example: <xyz x="hello
  84            //		world">
  85            //
  86            ////////////////////////////////////////////////////////////////////////////////
  87            
  88 sage  1.14 #include <Pegasus/Common/Config.h>
  89 mike  1.13 #include <cctype>
  90            #include <cstdio>
  91            #include <cstdlib>
  92            #include <cstring>
  93            #include "XmlParser.h"
  94            #include "Logger.h"
  95 chuck 1.19 #include "ExceptionRep.h"
  96 mike  1.13 
  97            PEGASUS_NAMESPACE_BEGIN
  98            
  99            #define PEGASUS_ARRAY_T XmlEntry
 100            # include "ArrayImpl.h"
 101            #undef PEGASUS_ARRAY_T
 102            
 103            
 104            ////////////////////////////////////////////////////////////////////////////////
 105            //
 106            // Static helper functions
 107            //
 108            ////////////////////////////////////////////////////////////////////////////////
 109            
 110            static void _printValue(const char* p)
 111            {
 112                for (; *p; p++)
 113                {
 114            	if (*p == '\n')
 115            	    PEGASUS_STD(cout) << "\\n";
 116            	else if (*p == '\r')
 117 mike  1.13 	    PEGASUS_STD(cout) << "\\r";
 118            	else if (*p == '\t')
 119            	    PEGASUS_STD(cout) << "\\t";
 120            	else
 121            	    PEGASUS_STD(cout) << *p;
 122                }
 123            }
 124            
 125            struct EntityReference
 126            {
 127                const char* match;
 128                Uint32 length;
 129                char replacement;
 130            };
 131            
 132 kumpf 1.18 // ATTN: Add support for more entity references
 133 mike  1.13 static EntityReference _references[] =
 134            {
 135                { "&amp;", 5, '&' },
 136                { "&lt;", 4, '<' },
 137                { "&gt;", 4, '>' },
 138                { "&quot;", 6, '"' },
 139                { "&apos;", 6, '\'' }
 140            };
 141            
 142            static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 143            
 144            // Remove all redundant spaces from the given string:
 145            
 146            static void _normalize(char* text)
 147            {
 148                Uint32 length = strlen(text);
 149                char* p = text;
 150                char* end = p + length;
 151            
 152                // Remove leading spaces:
 153            
 154 mike  1.13     while (isspace(*p))
 155            	p++;
 156            
 157                if (p != text)
 158            	memmove(text, p, end - p + 1);
 159            
 160                p = text;
 161            
 162                // Look for sequences of more than one space and remove all but one.
 163            
 164                for (;;)
 165                {
 166            	// Advance to the next space:
 167            
 168            	while (*p && !isspace(*p))
 169            	    p++;
 170            
 171            	if (!*p)
 172            	    break;
 173            
 174            	// Advance to the next non-space:
 175 mike  1.13 
 176            	char* q = p++;
 177            
 178            	while (isspace(*p))
 179            	    p++;
 180            
 181            	// Discard trailing spaces (if we are at the end):
 182            
 183            	if (!*p)
 184            	{
 185            	    *q = '\0';
 186            	    break;
 187            	}
 188            
 189            	// Remove the redundant spaces:
 190            
 191            	Uint32 n = p - q;
 192            
 193            	if (n > 1)
 194            	{
 195            	    *q++ = ' ';
 196 mike  1.13 	    memmove(q, p, end - p + 1);
 197            	    p = q;
 198            	}
 199                }
 200            }
 201            
 202            ////////////////////////////////////////////////////////////////////////////////
 203            //
 204            // XmlException
 205            //
 206            ////////////////////////////////////////////////////////////////////////////////
 207            
 208            static const char* _xmlMessages[] =
 209            {
 210                "Bad opening element",
 211                "Bad closing element",
 212                "Bad attribute name",
 213                "Exepected equal sign",
 214                "Bad attribute value",
 215                "A \"--\" sequence found within comment",
 216                "Unterminated comment",
 217 mike  1.13     "Unterminated CDATA block",
 218                "Unterminated DOCTYPE",
 219                "Too many attributes: parser only handles 10",
 220                "Malformed reference",
 221                "Expected a comment or CDATA following \"<!\" sequence",
 222                "Closing element does not match opening element",
 223                "One or more tags are still open",
 224                "More than one root element was encountered",
 225                "Validation error",
 226                "Semantic error"
 227            };
 228            
 229 chuck 1.19 static const char* _xmlKeys[] = 
 230            {
 231 humberto 1.20     "Common.XmlParser.BAD_START_TAG",
 232 chuck    1.19     "Common.XmlParser.BAD_END_TAG",
 233                   "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 234                   "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 235                   "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 236                   "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 237                   "Common.XmlParser.UNTERMINATED_COMMENT",
 238                   "Common.XmlParser.UNTERMINATED_CDATA",
 239                   "Common.XmlParser.UNTERMINATED_DOCTYPE",
 240                   "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 241                   "Common.XmlParser.MALFORMED_REFERENCE",
 242                   "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 243                   "Common.XmlParser.START_END_MISMATCH",
 244                   "Common.XmlParser.UNCLOSED_TAGS", 
 245                   "Common.XmlParser.MULTIPLE_ROOTS",
 246                   "Common.XmlParser.VALIDATION_ERROR",
 247                   "Common.XmlParser.SEMANTIC_ERROR"
 248               };
 249               
 250 chuck    1.23 // l10n replace _formMessage (comment out the old one)
 251 chuck    1.19 /*
 252 mike     1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 253               {
 254                   String result = _xmlMessages[Uint32(code) - 1];
 255               
 256                   char buffer[32];
 257                   sprintf(buffer, "%d", line);
 258                   result.append(": on line ");
 259                   result.append(buffer);
 260               
 261                   if (message.size())
 262                   {
 263               	result.append(": ");
 264               	result.append(message);
 265                   }
 266               
 267                   return result;
 268               }
 269 chuck    1.19 */
 270               
 271               static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 272               {
 273                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 274                   String key = _xmlKeys[Uint32(code) - 1];
 275               	String msg = message;
 276               
 277                   dftMsg.append(": on line $0");
 278                   if (message.size())
 279                   {
 280 humberto 1.20     	msg = ": " + msg;
 281 chuck    1.19     	dftMsg.append("$1");
 282                   }    
 283               
 284                   return MessageLoaderParms(key, dftMsg, line ,msg);
 285               }
 286               
 287               static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 288               {
 289                   String dftMsg = _xmlMessages[Uint32(code) - 1];
 290                   String key = _xmlKeys[Uint32(code) - 1];
 291               
 292                   dftMsg.append(": on line $0");
 293                
 294                   return MessageLoaderParms(key, dftMsg, line);
 295               }
 296               
 297 mike     1.13 
 298               XmlException::XmlException(
 299                   XmlException::Code code, 
 300                   Uint32 lineNumber,
 301                   const String& message) 
 302                   : Exception(_formMessage(code, lineNumber, message))
 303               {
 304               
 305               }
 306               
 307 chuck    1.19 
 308               XmlException::XmlException(
 309                   XmlException::Code code, 
 310                   Uint32 lineNumber,
 311                   MessageLoaderParms& msgParms) 
 312                   : Exception(_formPartialMessage(code, lineNumber))
 313               {
 314 humberto 1.21 	if (msgParms.default_msg.size())
 315                   {
 316                   	msgParms.default_msg = ": " + msgParms.default_msg;
 317                   } 
 318 chuck    1.19 	_rep->message.append(MessageLoader::getMessage(msgParms));
 319               }
 320               
 321               
 322 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 323               //
 324               // XmlValidationError
 325               //
 326               ////////////////////////////////////////////////////////////////////////////////
 327               
 328               XmlValidationError::XmlValidationError(
 329                   Uint32 lineNumber,
 330                   const String& message)
 331                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 332               {
 333               
 334               }
 335               
 336 chuck    1.19 
 337               XmlValidationError::XmlValidationError(
 338                   Uint32 lineNumber,
 339                   MessageLoaderParms& msgParms)
 340                   : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 341               {
 342               
 343               }
 344               
 345               
 346 mike     1.13 ////////////////////////////////////////////////////////////////////////////////
 347               //
 348               // XmlSemanticError
 349               //
 350               ////////////////////////////////////////////////////////////////////////////////
 351               
 352               XmlSemanticError::XmlSemanticError(
 353                   Uint32 lineNumber,
 354                   const String& message)
 355                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 356               {
 357               
 358               }
 359 chuck    1.19 
 360               
 361               XmlSemanticError::XmlSemanticError(
 362                   Uint32 lineNumber,
 363                   MessageLoaderParms& msgParms)
 364                   : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 365               {
 366               
 367               }
 368               
 369 mike     1.13 
 370               ////////////////////////////////////////////////////////////////////////////////
 371               //
 372               // XmlParser
 373               //
 374               ////////////////////////////////////////////////////////////////////////////////
 375               
 376               XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), 
 377                   _restoreChar('\0'), _foundRoot(false)
 378               {
 379               
 380               }
 381               
 382               Boolean XmlParser::next(XmlEntry& entry)
 383               {
 384                   if (!_putBackStack.isEmpty())
 385                   {
 386               	entry = _putBackStack.top();
 387               	_putBackStack.pop();
 388               	return true;
 389                   }
 390 mike     1.13 
 391                   // If a character was overwritten with a null-terminator the last
 392                   // time this routine was called, then put back that character. Before
 393                   // exiting of course, restore the null-terminator.
 394               
 395                   char* nullTerminator = 0;
 396               
 397                   if (_restoreChar && !*_current)
 398                   {
 399               	nullTerminator = _current;
 400               	*_current = _restoreChar;
 401               	_restoreChar = '\0';
 402                   }
 403               
 404                   // Skip over any whitespace:
 405               
 406                   _skipWhitespace(_current);
 407               
 408                   if (!*_current)
 409                   {
 410               	if (nullTerminator)
 411 mike     1.13 	    *nullTerminator = '\0';
 412               
 413               	if (!_stack.isEmpty())
 414               	    throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 415               
 416               	return false;
 417                   }
 418               
 419                   // Either a "<...>" or content begins next:
 420               
 421                   if (*_current == '<')
 422                   {
 423               	_current++;
 424               	_getElement(_current, entry);
 425               
 426               	if (nullTerminator)
 427               	    *nullTerminator = '\0';
 428               
 429               	if (entry.type == XmlEntry::START_TAG)
 430               	{
 431               	    if (_stack.isEmpty() && _foundRoot)
 432 mike     1.13 		throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 433               
 434               	    _foundRoot = true;
 435               	    _stack.push((char*)entry.text);
 436               	}
 437               	else if (entry.type == XmlEntry::END_TAG)
 438               	{
 439               	    if (_stack.isEmpty())
 440               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 441               
 442               	    if (strcmp(_stack.top(), entry.text) != 0)
 443               		throw XmlException(XmlException::START_END_MISMATCH, _line);
 444               
 445               	    _stack.pop();
 446               	}
 447               
 448               	return true;
 449                   }
 450                   else
 451                   {
 452               	entry.type = XmlEntry::CONTENT;
 453 mike     1.13 	entry.text = _current;
 454               	_getContent(_current);
 455               	_restoreChar = *_current;
 456               	*_current = '\0';
 457               
 458               	if (nullTerminator)
 459               	    *nullTerminator = '\0';
 460               
 461               	_substituteReferences((char*)entry.text);
 462               	_normalize((char*)entry.text);
 463               
 464               	return true;
 465                   }
 466               }
 467               
 468               void XmlParser::putBack(XmlEntry& entry)
 469               {
 470                   _putBackStack.push(entry);
 471               }
 472               
 473               XmlParser::~XmlParser()
 474 mike     1.13 {
 475                   // Nothing to do!
 476               }
 477               
 478               void XmlParser::_skipWhitespace(char*& p)
 479               {
 480                   while (*p && isspace(*p))
 481                   {
 482               	if (*p == '\n')
 483               	    _line++;
 484               
 485               	p++;
 486                   }
 487               }
 488               
 489               Boolean XmlParser::_getElementName(char*& p)
 490               {
 491 kumpf    1.24     if (!String::isUTF8(p) ||
 492                       !(((*p >= 'A') && (*p <= 'Z')) ||
 493                         ((*p >= 'a') && (*p <= 'z')) ||
 494                         (*p == '_')))
 495 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 496 kumpf    1.24     p++;
 497 mike     1.13 
 498 david    1.22     while ((*p) &&
 499               	   (((*p >= 'A') && (*p <= 'Z')) ||
 500               	    ((*p >= 'a') && (*p <= 'z')) ||
 501 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 502 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 503 mike     1.13 	p++;
 504               
 505                   // The next character must be a space:
 506               
 507                   if (isspace(*p))
 508                   {
 509               	*p++ = '\0';
 510               	_skipWhitespace(p);
 511                   }
 512               
 513                   if (*p == '>')
 514                   {
 515               	*p++ = '\0';
 516               	return true;
 517                   }
 518               
 519                   return false;
 520               }
 521               
 522               Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 523               {
 524 mike     1.13     openCloseElement = false;
 525               
 526 kumpf    1.24     if (!String::isUTF8(p) ||
 527                       !(((*p >= 'A') && (*p <= 'Z')) ||
 528                         ((*p >= 'a') && (*p <= 'z')) ||
 529                         (*p == '_')))
 530 mike     1.13 	throw XmlException(XmlException::BAD_START_TAG, _line);
 531 kumpf    1.24     p++;
 532 mike     1.13 
 533 david    1.22     while ((*p) &&
 534               	   (((*p >= 'A') && (*p <= 'Z')) ||
 535               	    ((*p >= 'a') && (*p <= 'z')) ||
 536 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 537 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 538 mike     1.13 	p++;
 539               
 540                   // The next character must be a space:
 541               
 542                   if (isspace(*p))
 543                   {
 544               	*p++ = '\0';
 545               	_skipWhitespace(p);
 546                   }
 547               
 548                   if (*p == '>')
 549                   {
 550               	*p++ = '\0';
 551               	return true;
 552                   }
 553               
 554                   if (p[0] == '/' && p[1] == '>')
 555                   {
 556               	openCloseElement = true;
 557               	*p = '\0';
 558               	p += 2;
 559 mike     1.13 	return true;
 560                   }
 561               
 562                   return false;
 563               }
 564               
 565               void XmlParser::_getAttributeNameAndEqual(char*& p)
 566               {
 567 kumpf    1.24     if (!String::isUTF8(p) ||
 568                       !(((*p >= 'A') && (*p <= 'Z')) ||
 569                         ((*p >= 'a') && (*p <= 'z')) ||
 570                         (*p == '_')))
 571 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 572 kumpf    1.24     p++;
 573 mike     1.13 
 574 david    1.22     while ((*p) &&
 575               	   (((*p >= 'A') && (*p <= 'Z')) ||
 576               	    ((*p >= 'a') && (*p <= 'z')) ||
 577 kumpf    1.24 	    ((*p >= '0') && (*p <= '9')) ||
 578 david    1.22 	    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 579 mike     1.13 	p++;
 580               
 581                   char* term = p;
 582               
 583                   _skipWhitespace(p);
 584               
 585                   if (*p != '=')
 586               	throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 587               
 588                   p++;
 589               
 590                   _skipWhitespace(p);
 591               
 592                   *term = '\0';
 593               }
 594               
 595               void XmlParser::_getAttributeValue(char*& p)
 596               {
 597                   // ATTN-B: handle values contained in semiquotes:
 598               
 599                   if (*p != '"' && *p != '\'')
 600 mike     1.13 	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 601               
 602                   char startChar = *p++;
 603               
 604                   while (*p && *p != startChar)
 605               	p++;
 606               
 607                   if (*p != startChar)
 608               	throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 609               
 610                   *p++ = '\0';
 611               }
 612               
 613               void XmlParser::_getComment(char*& p)
 614               {
 615                   // Now p points to first non-whitespace character beyond "<--" sequence:
 616               
 617                   for (; *p; p++)
 618                   {
 619               	if (p[0] == '-' && p[1] == '-')
 620               	{
 621 mike     1.13 	    if (p[2] != '>')
 622               	    {
 623               		throw XmlException(
 624               		    XmlException::MINUS_MINUS_IN_COMMENT, _line);
 625               	    }
 626               
 627               	    // Find end of comment (excluding whitespace):
 628               
 629               	    *p = '\0';
 630               	    p += 3;
 631               	    return;
 632               	}
 633                   }
 634               
 635                   // If it got this far, then the comment is unterminated:
 636               
 637                   throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 638               }
 639               
 640               void XmlParser::_getCData(char*& p)
 641               {
 642 mike     1.13     // At this point p points one past "<![CDATA[" sequence:
 643               
 644                   for (; *p; p++)
 645                   {
 646               	if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 647               	{
 648               	    *p = '\0';
 649               	    p += 3;
 650               	    return;
 651               	}
 652               	else if (*p == '\n')
 653               	    _line++;
 654                   }
 655               
 656                   // If it got this far, then the comment is unterminated:
 657               
 658                   throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 659               }
 660               
 661               void XmlParser::_getDocType(char*& p)
 662               {
 663 mike     1.13     // Just ignore the DOCTYPE command for now:
 664               
 665                   for (; *p && *p != '>'; p++)
 666                   {
 667               	if (*p == '\n')
 668               	    _line++;
 669                   }
 670               
 671                   if (*p != '>')
 672               	throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 673               
 674                   p++;
 675               }
 676               
 677               void XmlParser::_getContent(char*& p)
 678               {
 679                   while (*p && *p != '<')
 680                   {
 681               	if (*p == '\n')
 682               	    _line++;
 683               
 684 mike     1.13 	p++;
 685                   }
 686               }
 687               
 688               void XmlParser::_substituteReferences(char* text)
 689               {
 690                   Uint32 rem = strlen(text);
 691               
 692                   for (char* p = text; *p; p++, rem--)
 693                   {
 694               	if (*p == '&')
 695               	{
 696 kumpf    1.18             // Process character or entity reference
 697 mike     1.13 
 698 kumpf    1.18             Uint16 referenceChar = 0;
 699                           Uint32 referenceLength = 0;
 700                           XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 701               
 702                           if (*(p+1) == '#')
 703                           {
 704                               // Found a character (numeric) reference
 705                               // Determine whether it is decimal or hex
 706                               if (*(p+2) == 'x')
 707                               {
 708                                   // Decode a hexadecimal character reference
 709                                   char* q = p+3;
 710               
 711                                   // At most four digits are allowed, plus trailing ';'
 712                                   Uint32 numDigits;
 713                                   for (numDigits = 0; numDigits < 5; numDigits++, q++)
 714                                   {
 715                                       if (isdigit(*q))
 716                                       {
 717                                           referenceChar = (referenceChar << 4);
 718                                           referenceChar += (*q - '0');
 719 kumpf    1.18                         }
 720                                       else if ((*q >= 'A') && (*q <= 'F'))
 721                                       {
 722                                           referenceChar = (referenceChar << 4);
 723                                           referenceChar += (*q - 'A' + 10);
 724                                       }
 725                                       else if ((*q >= 'a') && (*q <= 'f'))
 726                                       {
 727                                           referenceChar = (referenceChar << 4);
 728                                           referenceChar += (*q - 'a' + 10);
 729                                       }
 730                                       else if (*q == ';')
 731                                       {
 732                                           break;
 733                                       }
 734                                       else
 735                                       {
 736                                           throw XmlException(code, _line);
 737                                       }
 738                                   }
 739               
 740 kumpf    1.18                     // Hex number must be 1 - 4 digits
 741                                   if ((numDigits == 0) || (numDigits > 4))
 742                                   {
 743                                       throw XmlException(code, _line);
 744                                   }
 745               
 746                                   // ATTN: Currently do not support 16-bit characters
 747                                   if (referenceChar > 0xff)
 748                                   {
 749                                       // ATTN: Is there a good way to say "unsupported"?
 750                                       throw XmlException(code, _line);
 751                                   }
 752               
 753                                   referenceLength = numDigits + 4;
 754                               }
 755                               else
 756                               {
 757                                   // Decode a decimal character reference
 758                                   Uint32 newChar = 0;
 759                                   char* q = p+2;
 760               
 761 kumpf    1.18                     // At most five digits are allowed, plus trailing ';'
 762                                   Uint32 numDigits;
 763                                   for (numDigits = 0; numDigits < 6; numDigits++, q++)
 764                                   {
 765                                       if (isdigit(*q))
 766                                       {
 767                                           newChar = (newChar * 10);
 768                                           newChar += (*q - '0');
 769                                       }
 770                                       else if (*q == ';')
 771                                       {
 772                                           break;
 773                                       }
 774                                       else
 775                                       {
 776                                           throw XmlException(code, _line);
 777                                       }
 778                                   }
 779               
 780                                   // Decimal number must be 1 - 5 digits and fit in 16 bits
 781                                   if ((numDigits == 0) || (numDigits > 5) ||
 782 kumpf    1.18                         (newChar > 0xffff))
 783                                   {
 784                                       throw XmlException(code, _line);
 785                                   }
 786               
 787                                   // ATTN: Currently do not support 16-bit characters
 788                                   if (newChar > 0xff)
 789                                   {
 790                                       // ATTN: Is there a good way to say "unsupported"?
 791                                       throw XmlException(code, _line);
 792                                   }
 793               
 794                                   referenceChar = Uint16(newChar);
 795                                   referenceLength = numDigits + 3;
 796                               }
 797                           }
 798                           else
 799                           {
 800                               // Check for entity reference
 801                               // ATTN: Inefficient if many entity references are supported
 802                               Uint32 i;
 803 kumpf    1.18                 for (i = 0; i < _REFERENCES_SIZE; i++)
 804                               {
 805                                   Uint32 length = _references[i].length;
 806                                   const char* match = _references[i].match;
 807               
 808                                   if (strncmp(p, _references[i].match, length) == 0)
 809                                   {
 810                                       referenceChar = _references[i].replacement;
 811                                       referenceLength = length;
 812                                       break;
 813                                   }
 814                               }
 815               
 816                               if (i == _REFERENCES_SIZE)
 817                               {
 818                                   // Didn't recognize the entity reference
 819                                   // ATTN: Is there a good way to say "unsupported"?
 820                                   throw XmlException(code, _line);
 821                               }
 822                           }
 823               
 824 kumpf    1.18             // Replace the reference with the correct character
 825                           *p = (char)referenceChar;
 826                           char* q = p + referenceLength;
 827                           rem = rem - referenceLength + 1;
 828                           memmove(p + 1, q, rem);
 829 mike     1.13 	}
 830                   }
 831               }
 832               
 833               static const char _EMPTY_STRING[] = "";
 834               
 835               void XmlParser::_getElement(char*& p, XmlEntry& entry)
 836               {
 837                   entry.attributeCount = 0;
 838               
 839                   //--------------------------------------------------------------------------
 840                   // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 841                   //--------------------------------------------------------------------------
 842               
 843                   if (*p == '?')
 844                   {
 845               	entry.type = XmlEntry::XML_DECLARATION;
 846               	entry.text = ++p;
 847               
 848               	Boolean openCloseElement = false;
 849               
 850 mike     1.13 	if (_getElementName(p))
 851               	    return;
 852                   }
 853                   else if (*p == '!')
 854                   {
 855               	p++;
 856               
 857               	// Expect a comment or CDATA:
 858               
 859               	if (p[0] == '-' && p[1] == '-')
 860               	{
 861               	    p += 2;
 862               	    entry.type = XmlEntry::COMMENT;
 863               	    entry.text = p;
 864               	    _getComment(p);
 865               	    return;
 866               	}
 867               	else if (memcmp(p, "[CDATA[", 7) == 0)
 868               	{
 869               	    p += 7;
 870               	    entry.type = XmlEntry::CDATA;
 871 mike     1.13 	    entry.text = p;
 872               	    _getCData(p);
 873               	    return;
 874               	}
 875               	else if (memcmp(p, "DOCTYPE", 7) == 0)
 876               	{
 877               	    entry.type = XmlEntry::DOCTYPE;
 878               	    entry.text = _EMPTY_STRING;
 879               	    _getDocType(p);
 880               	    return;
 881               	}
 882               	throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 883                   }
 884                   else if (*p == '/')
 885                   {
 886               	entry.type = XmlEntry::END_TAG;
 887               	entry.text = ++p;
 888               
 889               	if (!_getElementName(p))
 890               	    throw(XmlException(XmlException::BAD_END_TAG, _line));
 891               
 892 mike     1.13 	return;
 893                   }
 894 kumpf    1.24     else if (String::isUTF8(p) &&
 895                            (((*p >= 'A') && (*p <= 'Z')) ||
 896                             ((*p >= 'a') && (*p <= 'z')) ||
 897                             (*p == '_')))
 898 mike     1.13     {
 899               	entry.type = XmlEntry::START_TAG;
 900               	entry.text = p;
 901               
 902               	Boolean openCloseElement = false;
 903               
 904               	if (_getOpenElementName(p, openCloseElement))
 905               	{
 906               	    if (openCloseElement)
 907               		entry.type = XmlEntry::EMPTY_TAG;
 908               	    return;
 909               	}
 910                   }
 911                   else
 912               	throw XmlException(XmlException::BAD_START_TAG, _line);
 913               
 914                   //--------------------------------------------------------------------------
 915                   // Grab all the attributes:
 916                   //--------------------------------------------------------------------------
 917               
 918                   for (;;)
 919 mike     1.13     {
 920               	if (entry.type == XmlEntry::XML_DECLARATION)
 921               	{
 922               	    if (p[0] == '?' && p[1] == '>')
 923               	    {
 924               		p += 2;
 925               		return;
 926               	    }
 927               	}
 928               	else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 929               	{
 930               	    entry.type = XmlEntry::EMPTY_TAG;
 931               	    p += 2;
 932               	    return;
 933               	}
 934               	else if (*p == '>')
 935               	{
 936               	    p++;
 937               	    return;
 938               	}
 939               
 940 mike     1.13 	XmlAttribute attr;
 941               	attr.name = p;
 942               	_getAttributeNameAndEqual(p);
 943               
 944               	if (*p != '"' && *p != '\'')
 945               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 946               
 947               	attr.value = p + 1;
 948               	_getAttributeValue(p);
 949               
 950               	if (entry.type == XmlEntry::XML_DECLARATION)
 951               	{
 952               	    // The next thing must a space or a "?>":
 953               
 954               	    if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
 955               	    {
 956               		throw XmlException(
 957               		    XmlException::BAD_ATTRIBUTE_VALUE, _line);
 958               	    }
 959               	}
 960               	else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
 961 mike     1.13 	{
 962               	    // The next thing must be a space or a '>':
 963               
 964               	    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 965               	}
 966               
 967               	_skipWhitespace(p);
 968               
 969               	if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 970               	    throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 971               
 972               	_substituteReferences((char*)attr.value);
 973               	entry.attributes[entry.attributeCount++] = attr;
 974                   }
 975               }
 976               
 977               static const char* _typeStrings[] =
 978               {
 979                   "XML_DECLARATION", 
 980                   "START_TAG", 
 981                   "EMPTY_TAG", 
 982 mike     1.13     "END_TAG", 
 983                   "COMMENT",
 984                   "CDATA",
 985                   "DOCTYPE",
 986                   "CONTENT" 
 987               };
 988               
 989               void XmlEntry::print() const
 990               {
 991                   PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
 992               
 993                   Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
 994               
 995                   if (needQuotes)
 996               	PEGASUS_STD(cout) << "\"";
 997               	
 998                   _printValue(text);
 999               
1000                   if (needQuotes)
1001               	PEGASUS_STD(cout) << "\"";
1002               
1003 mike     1.13     PEGASUS_STD(cout) << '\n';
1004               
1005                   for (Uint32 i = 0; i < attributeCount; i++)
1006                   {
1007               	PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
1008               	_printValue(attributes[i].value);
1009               	PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1010                   }
1011               }
1012               
1013               const XmlAttribute* XmlEntry::findAttribute(
1014                   const char* name) const
1015               {
1016                   for (Uint32 i = 0; i < attributeCount; i++)
1017                   {
1018               	if (strcmp(attributes[i].name, name) == 0)
1019               	    return &attributes[i];
1020                   }
1021               
1022                   return 0;
1023               }
1024 mike     1.13 
1025               // Find first non-whitespace character (set first) and last non-whitespace
1026               // character (set last one past this). For example, consider this string:
1027               //
1028               //	"   87     "
1029               //
1030               // The first pointer would point to '8' and the last pointer woudl point one
1031               // beyond '7'.
1032               
1033               static void _findEnds(
1034                   const char* str, 
1035                   const char*& first, 
1036                   const char*& last)
1037               {
1038                   first = str;
1039               
1040                   while (isspace(*first))
1041               	first++;
1042               
1043                   if (!*first)
1044                   {
1045 mike     1.13 	last = first;
1046               	return;
1047                   }
1048               
1049                   last = first + strlen(first);
1050               
1051                   while (last != first && isspace(last[-1]))
1052               	last--;
1053               }
1054               
1055               Boolean XmlEntry::getAttributeValue(
1056                   const char* name, 
1057                   Uint32& value) const
1058               {
1059                   const XmlAttribute* attr = findAttribute(name);
1060               
1061                   if (!attr)
1062               	return false;
1063               
1064                   const char* first;
1065                   const char* last;
1066 mike     1.13     _findEnds(attr->value, first, last);
1067               
1068                   char* end = 0;
1069                   long tmp = strtol(first, &end, 10);
1070               
1071                   if (!end || end != last)
1072               	return false;
1073               
1074                   value = Uint32(tmp);
1075                   return true;
1076               }
1077               
1078               Boolean XmlEntry::getAttributeValue(
1079                   const char* name, 
1080                   Real32& value) const
1081               {
1082                   const XmlAttribute* attr = findAttribute(name);
1083               
1084                   if (!attr)
1085               	return false;
1086               
1087 mike     1.13     const char* first;
1088                   const char* last;
1089                   _findEnds(attr->value, first, last);
1090               
1091                   char* end = 0;
1092                   double tmp = strtod(first, &end);
1093               
1094                   if (!end || end != last)
1095               	return false;
1096               
1097                   value = Uint32(tmp);
1098                   return true;
1099               }
1100               
1101               Boolean XmlEntry::getAttributeValue(
1102                   const char* name, 
1103                   const char*& value) const
1104               {
1105                   const XmlAttribute* attr = findAttribute(name);
1106               
1107                   if (!attr)
1108 mike     1.13 	return false;
1109               
1110                   value = attr->value;
1111                   return true;
1112               }
1113               
1114               Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1115               {
1116                   const char* tmp;
1117               
1118                   if (!getAttributeValue(name, tmp))
1119               	return false;
1120               
1121 david    1.22     value = String(tmp,STRING_FLAG_UTF8);
1122 mike     1.13     return true;
1123               }
1124               
1125               void XmlAppendCString(Array<Sint8>& out, const char* str)
1126               {
1127                   out.append(str, strlen(str));
1128               }
1129               
1130               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2