(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.38 //%2006////////////////////////////////////////////////////////////////////////
   2 mike  1.13 //
   3 karl  1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.27 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 karl  1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  12            // EMC Corporation; Symantec Corporation; The Open Group.
  13 mike  1.13 //
  14            // Permission is hereby granted, free of charge, to any person obtaining a copy
  15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
  16            // deal in the Software without restriction, including without limitation the
  17            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  18 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  19            // furnished to do so, subject to the following conditions:
  20 karl  1.38 // 
  21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  22 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  23            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  25            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  26            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  27 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29            //
  30            //==============================================================================
  31            //
  32            //%/////////////////////////////////////////////////////////////////////////////
  33            
  34            ////////////////////////////////////////////////////////////////////////////////
  35            //
  36            // XmlParser
  37            //
  38 david.dillard 1.32 //      This file contains a simple non-validating XML parser. Here are
  39                    //      serveral rules for well-formed XML:
  40 mike          1.13 //
  41 david.dillard 1.32 //          1.  Documents must begin with an XML declaration:
  42 mike          1.13 //
  43 david.dillard 1.32 //              <?xml version="1.0" standalone="yes"?>
  44 mike          1.13 //
  45 david.dillard 1.32 //          2.  Comments have the form:
  46 mike          1.13 //
  47 david.dillard 1.32 //              <!-- blah blah blah -->
  48 mike          1.13 //
  49 david.dillard 1.32 //          3. The following entity references are supported:
  50 mike          1.13 //
  51 david.dillard 1.32 //              &amp - ampersand
  52                    //              &lt - less-than
  53                    //              &gt - greater-than
  54                    //              &quot - full quote
  55                    //              &apos - apostrophe
  56 mike          1.13 //
  57 kumpf         1.18 //             as well as character (numeric) references:
  58 mike          1.35 //
  59 kumpf         1.18 //              &#49; - decimal reference for character '1'
  60                    //              &#x31; - hexadecimal reference for character '1'
  61                    //
  62 david.dillard 1.32 //          4. Element names and attribute names take the following form:
  63 mike          1.13 //
  64 david.dillard 1.32 //              [A-Za-z_][A-Za-z_0-9-.:]
  65 mike          1.13 //
  66 david.dillard 1.32 //          5.  Arbitrary data (CDATA) can be enclosed like this:
  67 mike          1.13 //
  68 david.dillard 1.32 //                  <![CDATA[
  69                    //                  ...
  70                    //                  ]]>
  71 mike          1.13 //
  72 david.dillard 1.32 //          6.  Element names and attributes names are case-sensitive.
  73 mike          1.13 //
  74 david.dillard 1.32 //          7.  XmlAttribute values must be delimited by full or half quotes.
  75                    //              XmlAttribute values must be delimited.
  76 mike          1.13 //
  77 david.dillard 1.32 //          8.  <!DOCTYPE...>
  78 mike          1.13 //
  79                    // TODO:
  80                    //
  81 kumpf         1.40 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
  82 mike          1.35 //      work. Handle <!DOCTYPE...> sections which are complicated (containing
  83 mike          1.13 //        rules rather than references to files).
  84                    //
  85 david.dillard 1.32 //      Remove newlines from string literals:
  86 mike          1.13 //
  87                    //          Example: <xyz x="hello
  88 david.dillard 1.32 //              world">
  89 mike          1.13 //
  90                    ////////////////////////////////////////////////////////////////////////////////
  91                    
  92 sage          1.14 #include <Pegasus/Common/Config.h>
  93 mike          1.13 #include <cctype>
  94                    #include <cstdio>
  95                    #include <cstdlib>
  96                    #include <cstring>
  97                    #include "XmlParser.h"
  98                    #include "Logger.h"
  99 chuck         1.19 #include "ExceptionRep.h"
 100 mike          1.34 #include "CharSet.h"
 101 mike          1.13 
 102                    PEGASUS_NAMESPACE_BEGIN
 103                    
 104                    ////////////////////////////////////////////////////////////////////////////////
 105                    //
 106                    // Static helper functions
 107                    //
 108                    ////////////////////////////////////////////////////////////////////////////////
 109                    
 110                    static void _printValue(const char* p)
 111                    {
 112                        for (; *p; p++)
 113                        {
 114 david.dillard 1.32         if (*p == '\n')
 115                                PEGASUS_STD(cout) << "\\n";
 116                            else if (*p == '\r')
 117                                PEGASUS_STD(cout) << "\\r";
 118                            else if (*p == '\t')
 119                                PEGASUS_STD(cout) << "\\t";
 120                            else
 121                                PEGASUS_STD(cout) << *p;
 122 mike          1.13     }
 123                    }
 124                    
 125                    struct EntityReference
 126                    {
 127                        const char* match;
 128                        Uint32 length;
 129                        char replacement;
 130                    };
 131                    
 132 kumpf         1.18 // ATTN: Add support for more entity references
 133 mike          1.13 static EntityReference _references[] =
 134                    {
 135                        { "&amp;", 5, '&' },
 136                        { "&lt;", 4, '<' },
 137                        { "&gt;", 4, '>' },
 138                        { "&quot;", 6, '"' },
 139                        { "&apos;", 6, '\'' }
 140                    };
 141                    
 142 chuck         1.26 
 143                    // Implements a check for a whitespace character, without calling
 144                    // isspace( ).  The isspace( ) function is locale-sensitive,
 145                    // and incorrectly flags some chars above 0x7f as whitespace.  This
 146                    // causes the XmlParser to incorrectly parse UTF-8 data.
 147                    //
 148                    // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
 149                    // defines white space as:
 150 david.dillard 1.32 // S    ::=    (#x20 | #x9 | #xD | #xA)+
 151 mike          1.34 static inline int _isspace(char c)
 152 chuck         1.26 {
 153 kumpf         1.36     return CharSet::isXmlWhiteSpace((Uint8)c);
 154 chuck         1.26 }
 155                    
 156 mike          1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 157                    
 158                    ////////////////////////////////////////////////////////////////////////////////
 159                    //
 160                    // XmlException
 161                    //
 162                    ////////////////////////////////////////////////////////////////////////////////
 163                    
 164                    static const char* _xmlMessages[] =
 165                    {
 166                        "Bad opening element",
 167                        "Bad closing element",
 168                        "Bad attribute name",
 169                        "Exepected equal sign",
 170                        "Bad attribute value",
 171                        "A \"--\" sequence found within comment",
 172                        "Unterminated comment",
 173                        "Unterminated CDATA block",
 174                        "Unterminated DOCTYPE",
 175                        "Too many attributes: parser only handles 10",
 176                        "Malformed reference",
 177 mike          1.13     "Expected a comment or CDATA following \"<!\" sequence",
 178                        "Closing element does not match opening element",
 179                        "One or more tags are still open",
 180                        "More than one root element was encountered",
 181                        "Validation error",
 182                        "Semantic error"
 183                    };
 184                    
 185 david.dillard 1.32 static const char* _xmlKeys[] =
 186 chuck         1.19 {
 187 humberto      1.20     "Common.XmlParser.BAD_START_TAG",
 188 chuck         1.19     "Common.XmlParser.BAD_END_TAG",
 189                        "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 190                        "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 191                        "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 192                        "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 193                        "Common.XmlParser.UNTERMINATED_COMMENT",
 194                        "Common.XmlParser.UNTERMINATED_CDATA",
 195                        "Common.XmlParser.UNTERMINATED_DOCTYPE",
 196                        "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 197                        "Common.XmlParser.MALFORMED_REFERENCE",
 198                        "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 199                        "Common.XmlParser.START_END_MISMATCH",
 200 david.dillard 1.32     "Common.XmlParser.UNCLOSED_TAGS",
 201 chuck         1.19     "Common.XmlParser.MULTIPLE_ROOTS",
 202                        "Common.XmlParser.VALIDATION_ERROR",
 203                        "Common.XmlParser.SEMANTIC_ERROR"
 204                    };
 205                    
 206 chuck         1.23 // l10n replace _formMessage (comment out the old one)
 207 chuck         1.19 /*
 208 mike          1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 209                    {
 210                        String result = _xmlMessages[Uint32(code) - 1];
 211                    
 212                        char buffer[32];
 213                        sprintf(buffer, "%d", line);
 214                        result.append(": on line ");
 215                        result.append(buffer);
 216                    
 217                        if (message.size())
 218                        {
 219 david.dillard 1.32         result.append(": ");
 220                            result.append(message);
 221 mike          1.13     }
 222                    
 223                        return result;
 224                    }
 225 chuck         1.19 */
 226                    
 227 kumpf         1.40 static MessageLoaderParms _formMessage(
 228                        Uint32 code,
 229                        Uint32 line,
 230                        const String& message)
 231 chuck         1.19 {
 232                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 233                        String key = _xmlKeys[Uint32(code) - 1];
 234 david.dillard 1.32         String msg = message;
 235 chuck         1.19 
 236                        dftMsg.append(": on line $0");
 237                        if (message.size())
 238                        {
 239 david.dillard 1.32         msg = ": " + msg;
 240                            dftMsg.append("$1");
 241                        }
 242 chuck         1.19 
 243                        return MessageLoaderParms(key, dftMsg, line ,msg);
 244                    }
 245                    
 246                    static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 247                    {
 248                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 249                        String key = _xmlKeys[Uint32(code) - 1];
 250                    
 251                        dftMsg.append(": on line $0");
 252 david.dillard 1.32 
 253 chuck         1.19     return MessageLoaderParms(key, dftMsg, line);
 254                    }
 255                    
 256 mike          1.13 
 257                    XmlException::XmlException(
 258 david.dillard 1.32     XmlException::Code code,
 259 mike          1.13     Uint32 lineNumber,
 260 david.dillard 1.32     const String& message)
 261 mike          1.13     : Exception(_formMessage(code, lineNumber, message))
 262                    {
 263                    
 264                    }
 265                    
 266 chuck         1.19 
 267                    XmlException::XmlException(
 268 david.dillard 1.32     XmlException::Code code,
 269 chuck         1.19     Uint32 lineNumber,
 270 david.dillard 1.32     MessageLoaderParms& msgParms)
 271 chuck         1.19     : Exception(_formPartialMessage(code, lineNumber))
 272                    {
 273 david.dillard 1.32         if (msgParms.default_msg.size())
 274 humberto      1.21     {
 275 david.dillard 1.32         msgParms.default_msg = ": " + msgParms.default_msg;
 276                        }
 277                            _rep->message.append(MessageLoader::getMessage(msgParms));
 278 chuck         1.19 }
 279                    
 280                    
 281 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 282                    //
 283                    // XmlValidationError
 284                    //
 285                    ////////////////////////////////////////////////////////////////////////////////
 286                    
 287                    XmlValidationError::XmlValidationError(
 288                        Uint32 lineNumber,
 289                        const String& message)
 290                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 291                    {
 292                    }
 293                    
 294 chuck         1.19 
 295                    XmlValidationError::XmlValidationError(
 296                        Uint32 lineNumber,
 297                        MessageLoaderParms& msgParms)
 298                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 299                    {
 300                    }
 301                    
 302                    
 303 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 304                    //
 305                    // XmlSemanticError
 306                    //
 307                    ////////////////////////////////////////////////////////////////////////////////
 308                    
 309                    XmlSemanticError::XmlSemanticError(
 310                        Uint32 lineNumber,
 311                        const String& message)
 312                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 313                    {
 314                    }
 315 chuck         1.19 
 316                    
 317                    XmlSemanticError::XmlSemanticError(
 318                        Uint32 lineNumber,
 319                        MessageLoaderParms& msgParms)
 320                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 321                    {
 322                    }
 323                    
 324 mike          1.13 
 325                    ////////////////////////////////////////////////////////////////////////////////
 326                    //
 327                    // XmlParser
 328                    //
 329                    ////////////////////////////////////////////////////////////////////////////////
 330                    
 331 kumpf         1.40 XmlParser::XmlParser(char* text)
 332                        : _line(1),
 333                          _current(text),
 334                          _restoreChar('\0'),
 335                          _foundRoot(false)
 336 mike          1.13 {
 337                    }
 338                    
 339 mike          1.34 inline void _skipWhitespace(Uint32& line, char*& p)
 340                    {
 341                        while (*p && _isspace(*p))
 342                        {
 343                            if (*p == '\n')
 344                                line++;
 345                    
 346                            p++;
 347                        }
 348                    }
 349                    
 350 kumpf         1.37 static int _getEntityRef(char*& p)
 351                    {
 352                        if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
 353                        {
 354                            p += 3;
 355                            return '>';
 356                        }
 357                    
 358                        if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
 359                        {
 360                            p += 3;
 361                            return '<';
 362                        }
 363                    
 364                        if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
 365                            (p[4] == ';'))
 366                        {
 367                            p += 5;
 368                            return '\'';
 369                        }
 370                    
 371 kumpf         1.37     if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
 372                            (p[4] == ';'))
 373                        {
 374                            p += 5;
 375                            return '"';
 376                        }
 377                    
 378                        if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
 379                        {
 380                            p += 4;
 381                            return '&';
 382                        }
 383                    
 384                        return -1;
 385                    }
 386                    
 387 kumpf         1.42.4.1 static inline int _getCharRef(char*& p)
 388 kumpf         1.37     {
 389                            char* end;
 390                            unsigned long ch;
 391 kumpf         1.42.4.1     Boolean hex = false;
 392 kumpf         1.37     
 393 kumpf         1.42.4.1     if (*p == 'x')
 394 kumpf         1.37         {
 395 kumpf         1.42.4.1         hex = true;
 396                                ch = strtoul(++p, &end, 16);
 397 kumpf         1.37         }
 398                            else
 399                            {
 400                                ch = strtoul(p, &end, 10);
 401                            }
 402                        
 403                            if ((end == p) || (*end != ';') || (ch > 255))
 404                            {
 405                                return -1;
 406                            }
 407                        
 408                            if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
 409                            {
 410                                return -1;
 411                            }
 412                        
 413                            p = end + 1;
 414                        
 415                            return ch;
 416                        }
 417                        
 418 kumpf         1.42.4.1 // Parse an entity reference or a character reference
 419                        static inline int _getRef(Uint32 line, char*& p)
 420 kumpf         1.37     {
 421 kumpf         1.42.4.1     int ch;
 422 kumpf         1.37     
 423 kumpf         1.42.4.1     if (*p == '#')
 424                            {
 425                                ch = _getCharRef(++p);
 426                            }
 427                            else
 428                            {
 429                                ch = _getEntityRef(p);
 430                            }
 431                        
 432                            if (ch == -1)
 433                            {
 434                                throw XmlException(XmlException::MALFORMED_REFERENCE, line);
 435                            }
 436 kumpf         1.37     
 437 kumpf         1.42.4.1     return ch;
 438                        }
 439                        
 440                        static inline void _normalizeElementValue(
 441                            Uint32& line,
 442                            char*& p)
 443                        {
 444 kumpf         1.37         // Process one character at a time:
 445                        
 446                            char* q = p;
 447                        
 448 kumpf         1.42.4.1     while (*p && (*p != '<'))
 449 kumpf         1.37         {
 450                                if (_isspace(*p))
 451                                {
 452 kumpf         1.42.4.1             // Trim whitespace from the end of the value, but do not compress
 453                                    // whitespace within the value.
 454                        
 455                                    const char* start = p;
 456 kumpf         1.37     
 457                                    if (*p++ == '\n')
 458                                    {
 459                                        line++;
 460                                    }
 461                        
 462                                    _skipWhitespace(line, p);
 463                        
 464 kumpf         1.42.4.1             if (*p && (*p != '<'))
 465 kumpf         1.37                 {
 466 kumpf         1.42.4.1                 // Transfer internal whitespace to q without compressing it.
 467                                        const char* i = start;
 468                                        while (i < p)
 469 kumpf         1.37                     {
 470 kumpf         1.42.4.1                     *q++ = *i++;
 471 kumpf         1.37                     }
 472                                    }
 473                                    else
 474                                    {
 475 kumpf         1.42.4.1                 // Do not transfer trailing whitespace to q.
 476                                        break;
 477 kumpf         1.37                 }
 478 kumpf         1.42.4.1         }
 479                                else if (*p == '&')
 480                                {
 481                                    // Process an entity reference or a character reference.
 482                        
 483                                    *q++ = _getRef(line, ++p);
 484                                }
 485                                else
 486                                {
 487                                    *q++ = *p++;
 488                                }
 489                            }
 490                        
 491                            // If q got behind p, it is safe and necessary to null-terminate q
 492 kumpf         1.37     
 493 kumpf         1.42.4.1     if (q != p)
 494                            {
 495                                *q = '\0';
 496                            }
 497                        }
 498                        
 499                        static inline void _normalizeAttributeValue(
 500                            Uint32& line,
 501                            char*& p,
 502                            char end_char,
 503                            char*& start)
 504                        {
 505                            // Skip over leading whitespace:
 506                        
 507                            _skipWhitespace(line, p);
 508                            start = p;
 509                        
 510                            // Process one character at a time:
 511                        
 512                            char* q = p;
 513                        
 514 kumpf         1.42.4.1     while (*p && (*p != end_char))
 515                            {
 516                                if (_isspace(*p))
 517                                {
 518                                    // Compress sequences of whitespace characters to a single space
 519                                    // character. Update line number when newlines encountered.
 520                        
 521                                    if (*p++ == '\n')
 522 kumpf         1.37                 {
 523 kumpf         1.42.4.1                 line++;
 524 kumpf         1.37                 }
 525                        
 526 kumpf         1.42.4.1             *q++ = ' ';
 527                        
 528                                    _skipWhitespace(line, p);
 529                                }
 530                                else if (*p == '&')
 531                                {
 532                                    // Process an entity reference or a character reference.
 533                        
 534                                    *q++ = _getRef(line, ++p);
 535 kumpf         1.37             }
 536                                else
 537                                {
 538                                    *q++ = *p++;
 539                                }
 540                            }
 541                        
 542                            // Remove single trailing whitespace (consecutive whitespaces already
 543                            // compressed above).  Since p >= q, we can tell if we need to strip a
 544                            // trailing space from q by looking at the end of p.  We must not look at
 545                            // the last character of p, though, if p is an empty string.
 546 kumpf         1.42.4.1     Boolean adjust_q = (p != start) && _isspace(p[-1]);
 547 kumpf         1.37     
 548 kumpf         1.42.4.1     // We encountered a the end_char or a zero-terminator.
 549                        
 550                            *q = *p;
 551                        
 552                            if (adjust_q)
 553 kumpf         1.37         {
 554                                q--;
 555                            }
 556                        
 557                            // If q got behind p, it is safe and necessary to null-terminate q
 558                        
 559                            if (q != p)
 560                            {
 561                                *q = '\0';
 562                            }
 563                        }
 564                        
 565 venkat.puvvada 1.41     Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)
 566 mike           1.13     {
 567                             if (!_putBackStack.isEmpty())
 568                             {
 569 david.dillard  1.32             entry = _putBackStack.top();
 570                                 _putBackStack.pop();
 571                                 return true;
 572 mike           1.13         }
 573                         
 574                             // If a character was overwritten with a null-terminator the last
 575                             // time this routine was called, then put back that character. Before
 576                             // exiting of course, restore the null-terminator.
 577                         
 578                             char* nullTerminator = 0;
 579                         
 580                             if (_restoreChar && !*_current)
 581                             {
 582 david.dillard  1.32             nullTerminator = _current;
 583                                 *_current = _restoreChar;
 584                                 _restoreChar = '\0';
 585 mike           1.13         }
 586                         
 587 venkat.puvvada 1.41         // Loop until we are done with comments if includeComment is false.
 588                             do
 589                             {
 590                                 // Skip over any whitespace:
 591                                 _skipWhitespace(_line, _current);
 592                         
 593                                 if (!*_current)
 594                                 {
 595                                     if (nullTerminator)
 596                                         *nullTerminator = '\0';
 597 mike           1.13     
 598 venkat.puvvada 1.41                 if (!_stack.isEmpty())
 599                                         throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 600 mike           1.13     
 601 venkat.puvvada 1.41                 return false;
 602                                 }
 603 mike           1.13     
 604 venkat.puvvada 1.41             // Either a "<...>" or content begins next:
 605 mike           1.13     
 606 venkat.puvvada 1.41             if (*_current == '<')
 607                                 {
 608                                     _current++;
 609                                     _getElement(_current, entry);
 610 mike           1.13     
 611 venkat.puvvada 1.41                 if (nullTerminator)
 612                                         *nullTerminator = '\0';
 613 mike           1.13     
 614 venkat.puvvada 1.41                 if (entry.type == XmlEntry::START_TAG)
 615                                     {
 616                                         if (_stack.isEmpty() && _foundRoot)
 617                                             throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 618 mike           1.13     
 619 venkat.puvvada 1.41                     _foundRoot = true;
 620                                         _stack.push((char*)entry.text);
 621                                     }
 622                                     else if (entry.type == XmlEntry::END_TAG)
 623                                     {
 624                                         if (_stack.isEmpty())
 625                                             throw XmlException(XmlException::START_END_MISMATCH, _line);
 626 mike           1.13     
 627 venkat.puvvada 1.41                     if (strcmp(_stack.top(), entry.text) != 0)
 628                                             throw XmlException(XmlException::START_END_MISMATCH, _line);
 629 david.dillard  1.32     
 630 venkat.puvvada 1.41                     _stack.pop();
 631                                     }
 632 david.dillard  1.32             }
 633 venkat.puvvada 1.41             else
 634 david.dillard  1.32             {
 635 venkat.puvvada 1.41                 // Normalize the content:
 636 mike           1.13     
 637 kumpf          1.42.4.1             char* start = _current;
 638                                     _normalizeElementValue(_line, _current);
 639 mike           1.13     
 640 venkat.puvvada 1.41                 // Get the content:
 641 mike           1.13     
 642 venkat.puvvada 1.41                 entry.type = XmlEntry::CONTENT;
 643                                     entry.text = start;
 644 kumpf          1.37     
 645 venkat.puvvada 1.41                 // Overwrite '<' with a null character (temporarily).
 646 kumpf          1.37     
 647 venkat.puvvada 1.41                 _restoreChar = *_current;
 648                                     *_current = '\0';
 649 kumpf          1.37     
 650 venkat.puvvada 1.41                 if (nullTerminator)
 651                                         *nullTerminator = '\0';
 652                                 }
 653                             }while (!includeComment && entry.type == XmlEntry::COMMENT);
 654 kumpf          1.37     
 655 venkat.puvvada 1.41         return true;
 656 mike           1.13     }
 657                         
 658                         void XmlParser::putBack(XmlEntry& entry)
 659                         {
 660                             _putBackStack.push(entry);
 661                         }
 662                         
 663                         XmlParser::~XmlParser()
 664                         {
 665                             // Nothing to do!
 666                         }
 667                         
 668 mike           1.35     // A-Za-z0-9_-:.
 669 kumpf          1.40     static unsigned char _isInnerElementChar[] =
 670 mike           1.35     {
 671                             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 672                             0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
 673                             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 674                             1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 675                             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 676                             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 677                             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 678                         };
 679                         
 680 mike           1.13     Boolean XmlParser::_getElementName(char*& p)
 681                         {
 682 mike           1.35         if (!CharSet::isAlNumUnder(Uint8(*p)))
 683 david.dillard  1.32             throw XmlException(XmlException::BAD_START_TAG, _line);
 684 mike           1.35     
 685 kumpf          1.24         p++;
 686 mike           1.13     
 687 mike           1.35         while (*p && _isInnerElementChar[Uint8(*p)])
 688 david.dillard  1.32             p++;
 689 mike           1.13     
 690                             // The next character must be a space:
 691                         
 692 chuck          1.26         if (_isspace(*p))
 693 mike           1.13         {
 694 david.dillard  1.32             *p++ = '\0';
 695 mike           1.34             _skipWhitespace(_line, p);
 696 mike           1.13         }
 697                         
 698                             if (*p == '>')
 699                             {
 700 david.dillard  1.32             *p++ = '\0';
 701                                 return true;
 702 mike           1.13         }
 703                         
 704                             return false;
 705                         }
 706                         
 707                         Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 708                         {
 709                             openCloseElement = false;
 710                         
 711 mike           1.35         if (!CharSet::isAlNumUnder(Uint8(*p)))
 712 david.dillard  1.32             throw XmlException(XmlException::BAD_START_TAG, _line);
 713 mike           1.35     
 714 kumpf          1.24         p++;
 715 mike           1.13     
 716 mike           1.35         while (*p && _isInnerElementChar[Uint8(*p)])
 717 david.dillard  1.32             p++;
 718 mike           1.13     
 719                             // The next character must be a space:
 720                         
 721 chuck          1.26         if (_isspace(*p))
 722 mike           1.13         {
 723 david.dillard  1.32             *p++ = '\0';
 724 mike           1.34             _skipWhitespace(_line, p);
 725 mike           1.13         }
 726                         
 727                             if (*p == '>')
 728                             {
 729 david.dillard  1.32             *p++ = '\0';
 730                                 return true;
 731 mike           1.13         }
 732                         
 733                             if (p[0] == '/' && p[1] == '>')
 734                             {
 735 david.dillard  1.32             openCloseElement = true;
 736                                 *p = '\0';
 737                                 p += 2;
 738                                 return true;
 739 mike           1.13         }
 740                         
 741                             return false;
 742                         }
 743                         
 744                         void XmlParser::_getAttributeNameAndEqual(char*& p)
 745                         {
 746 mike           1.35         if (!CharSet::isAlNumUnder((Uint8)*p))
 747 david.dillard  1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 748 mike           1.35     
 749 kumpf          1.24         p++;
 750 mike           1.13     
 751 mike           1.35         while (*p && _isInnerElementChar[Uint8(*p)])
 752 david.dillard  1.32             p++;
 753 mike           1.13     
 754                             char* term = p;
 755                         
 756 mike           1.34         _skipWhitespace(_line, p);
 757 mike           1.13     
 758                             if (*p != '=')
 759 david.dillard  1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 760 mike           1.13     
 761                             p++;
 762                         
 763 mike           1.34         _skipWhitespace(_line, p);
 764 mike           1.13     
 765                             *term = '\0';
 766                         }
 767                         
 768                         void XmlParser::_getComment(char*& p)
 769                         {
 770                             // Now p points to first non-whitespace character beyond "<--" sequence:
 771                         
 772                             for (; *p; p++)
 773                             {
 774 david.dillard  1.32             if (p[0] == '-' && p[1] == '-')
 775                                 {
 776                                     if (p[2] != '>')
 777                                     {
 778                                         throw XmlException(
 779                                             XmlException::MINUS_MINUS_IN_COMMENT, _line);
 780                                     }
 781                         
 782                                     // Find end of comment (excluding whitespace):
 783                         
 784                                     *p = '\0';
 785                                     p += 3;
 786                                     return;
 787                                 }
 788 mike           1.13         }
 789                         
 790                             // If it got this far, then the comment is unterminated:
 791                         
 792                             throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 793                         }
 794                         
 795                         void XmlParser::_getCData(char*& p)
 796                         {
 797                             // At this point p points one past "<![CDATA[" sequence:
 798                         
 799                             for (; *p; p++)
 800                             {
 801 david.dillard  1.32             if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 802                                 {
 803                                     *p = '\0';
 804                                     p += 3;
 805                                     return;
 806                                 }
 807                                 else if (*p == '\n')
 808                                     _line++;
 809 mike           1.13         }
 810                         
 811                             // If it got this far, then the comment is unterminated:
 812                         
 813                             throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 814                         }
 815                         
 816                         void XmlParser::_getDocType(char*& p)
 817                         {
 818                             // Just ignore the DOCTYPE command for now:
 819                         
 820                             for (; *p && *p != '>'; p++)
 821                             {
 822 david.dillard  1.32             if (*p == '\n')
 823                                     _line++;
 824 mike           1.13         }
 825                         
 826                             if (*p != '>')
 827 david.dillard  1.32             throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 828 mike           1.13     
 829                             p++;
 830                         }
 831                         
 832                         void XmlParser::_getElement(char*& p, XmlEntry& entry)
 833                         {
 834                             entry.attributeCount = 0;
 835                         
 836                             //--------------------------------------------------------------------------
 837                             // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 838                             //--------------------------------------------------------------------------
 839                         
 840                             if (*p == '?')
 841                             {
 842 david.dillard  1.32             entry.type = XmlEntry::XML_DECLARATION;
 843                                 entry.text = ++p;
 844 mike           1.13     
 845 david.dillard  1.32             Boolean openCloseElement = false;
 846 mike           1.13     
 847 david.dillard  1.32             if (_getElementName(p))
 848                                     return;
 849 mike           1.13         }
 850                             else if (*p == '!')
 851                             {
 852 david.dillard  1.32             p++;
 853 mike           1.13     
 854 david.dillard  1.32             // Expect a comment or CDATA:
 855 mike           1.13     
 856 david.dillard  1.32             if (p[0] == '-' && p[1] == '-')
 857                                 {
 858                                     p += 2;
 859                                     entry.type = XmlEntry::COMMENT;
 860                                     entry.text = p;
 861                                     _getComment(p);
 862                                     return;
 863                                 }
 864                                 else if (memcmp(p, "[CDATA[", 7) == 0)
 865                                 {
 866                                     p += 7;
 867                                     entry.type = XmlEntry::CDATA;
 868                                     entry.text = p;
 869                                     _getCData(p);
 870                                     return;
 871                                 }
 872                                 else if (memcmp(p, "DOCTYPE", 7) == 0)
 873                                 {
 874                                     entry.type = XmlEntry::DOCTYPE;
 875 kumpf          1.37                 entry.text = "";
 876 david.dillard  1.32                 _getDocType(p);
 877                                     return;
 878                                 }
 879                                 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 880 mike           1.13         }
 881                             else if (*p == '/')
 882                             {
 883 david.dillard  1.32             entry.type = XmlEntry::END_TAG;
 884                                 entry.text = ++p;
 885 mike           1.13     
 886 david.dillard  1.32             if (!_getElementName(p))
 887                                     throw(XmlException(XmlException::BAD_END_TAG, _line));
 888 mike           1.13     
 889 david.dillard  1.32             return;
 890 mike           1.13         }
 891 david          1.25         else if ((((*p >= 'A') && (*p <= 'Z')) ||
 892 kumpf          1.24                   ((*p >= 'a') && (*p <= 'z')) ||
 893                                       (*p == '_')))
 894 mike           1.13         {
 895 david.dillard  1.32             entry.type = XmlEntry::START_TAG;
 896                                 entry.text = p;
 897 mike           1.13     
 898 david.dillard  1.32             Boolean openCloseElement = false;
 899 mike           1.13     
 900 david.dillard  1.32             if (_getOpenElementName(p, openCloseElement))
 901                                 {
 902                                     if (openCloseElement)
 903                                         entry.type = XmlEntry::EMPTY_TAG;
 904                                     return;
 905                                 }
 906 mike           1.13         }
 907                             else
 908 david.dillard  1.32             throw XmlException(XmlException::BAD_START_TAG, _line);
 909 mike           1.13     
 910                             //--------------------------------------------------------------------------
 911                             // Grab all the attributes:
 912                             //--------------------------------------------------------------------------
 913                         
 914                             for (;;)
 915                             {
 916 david.dillard  1.32             if (entry.type == XmlEntry::XML_DECLARATION)
 917                                 {
 918                                     if (p[0] == '?' && p[1] == '>')
 919                                     {
 920                                         p += 2;
 921                                         return;
 922                                     }
 923                                 }
 924                                 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 925                                 {
 926                                     entry.type = XmlEntry::EMPTY_TAG;
 927                                     p += 2;
 928                                     return;
 929                                 }
 930                                 else if (*p == '>')
 931                                 {
 932                                     p++;
 933                                     return;
 934                                 }
 935                         
 936                                 XmlAttribute attr;
 937 david.dillard  1.32             attr.name = p;
 938                                 _getAttributeNameAndEqual(p);
 939                         
 940 kumpf          1.37             // Get the attribute value (e.g., "some value")
 941                                 {
 942                                     if ((*p != '"') && (*p != '\''))
 943                                     {
 944                                         throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 945                                     }
 946                         
 947                                     char quote = *p++;
 948                         
 949                                     char* start;
 950 kumpf          1.42.4.1             _normalizeAttributeValue(_line, p, quote, start);
 951 kumpf          1.37                 attr.value = start;
 952                         
 953                                     if (*p != quote)
 954                                     {
 955                                         throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 956                                     }
 957                         
 958                                     // Overwrite the closing quote with a null-terminator:
 959 david.dillard  1.32     
 960 kumpf          1.37                 *p++ = '\0';
 961                                 }
 962 david.dillard  1.32     
 963                                 if (entry.type == XmlEntry::XML_DECLARATION)
 964                                 {
 965                                     // The next thing must a space or a "?>":
 966 mike           1.13     
 967 david.dillard  1.32                 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
 968                                     {
 969                                         throw XmlException(
 970                                             XmlException::BAD_ATTRIBUTE_VALUE, _line);
 971                                     }
 972                                 }
 973                                 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
 974                                 {
 975                                     // The next thing must be a space or a '>':
 976 mike           1.13     
 977 david.dillard  1.32                 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 978                                 }
 979 mike           1.13     
 980 mike           1.34             _skipWhitespace(_line, p);
 981 david.dillard  1.32     
 982                                 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 983                                     throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 984                         
 985                                 entry.attributes[entry.attributeCount++] = attr;
 986 mike           1.13         }
 987                         }
 988                         
 989                         static const char* _typeStrings[] =
 990                         {
 991 david.dillard  1.32         "XML_DECLARATION",
 992                             "START_TAG",
 993                             "EMPTY_TAG",
 994                             "END_TAG",
 995 mike           1.13         "COMMENT",
 996                             "CDATA",
 997                             "DOCTYPE",
 998 david.dillard  1.32         "CONTENT"
 999 mike           1.13     };
1000                         
1001                         void XmlEntry::print() const
1002                         {
1003                             PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1004                         
1005                             Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1006                         
1007                             if (needQuotes)
1008 david.dillard  1.32             PEGASUS_STD(cout) << "\"";
1009                         
1010 mike           1.13         _printValue(text);
1011                         
1012                             if (needQuotes)
1013 david.dillard  1.32             PEGASUS_STD(cout) << "\"";
1014 mike           1.13     
1015                             PEGASUS_STD(cout) << '\n';
1016                         
1017                             for (Uint32 i = 0; i < attributeCount; i++)
1018                             {
1019 david.dillard  1.32             PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
1020                                 _printValue(attributes[i].value);
1021                                 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1022 mike           1.13         }
1023                         }
1024                         
1025                         const XmlAttribute* XmlEntry::findAttribute(
1026                             const char* name) const
1027                         {
1028                             for (Uint32 i = 0; i < attributeCount; i++)
1029                             {
1030 david.dillard  1.32             if (strcmp(attributes[i].name, name) == 0)
1031                                     return &attributes[i];
1032 mike           1.13         }
1033                         
1034                             return 0;
1035                         }
1036                         
1037                         // Find first non-whitespace character (set first) and last non-whitespace
1038                         // character (set last one past this). For example, consider this string:
1039                         //
1040 david.dillard  1.32     //      "   87     "
1041 mike           1.13     //
1042                         // The first pointer would point to '8' and the last pointer woudl point one
1043                         // beyond '7'.
1044                         
1045                         static void _findEnds(
1046 david.dillard  1.32         const char* str,
1047                             const char*& first,
1048 mike           1.13         const char*& last)
1049                         {
1050                             first = str;
1051                         
1052 chuck          1.26         while (_isspace(*first))
1053 david.dillard  1.32             first++;
1054 mike           1.13     
1055                             if (!*first)
1056                             {
1057 david.dillard  1.32             last = first;
1058                                 return;
1059 mike           1.13         }
1060                         
1061                             last = first + strlen(first);
1062                         
1063 chuck          1.26         while (last != first && _isspace(last[-1]))
1064 david.dillard  1.32             last--;
1065 mike           1.13     }
1066                         
1067                         Boolean XmlEntry::getAttributeValue(
1068 david.dillard  1.32         const char* name,
1069 mike           1.13         Uint32& value) const
1070                         {
1071                             const XmlAttribute* attr = findAttribute(name);
1072                         
1073                             if (!attr)
1074 david.dillard  1.32             return false;
1075 mike           1.13     
1076                             const char* first;
1077                             const char* last;
1078                             _findEnds(attr->value, first, last);
1079                         
1080                             char* end = 0;
1081                             long tmp = strtol(first, &end, 10);
1082                         
1083                             if (!end || end != last)
1084 david.dillard  1.32             return false;
1085 mike           1.13     
1086                             value = Uint32(tmp);
1087                             return true;
1088                         }
1089                         
1090                         Boolean XmlEntry::getAttributeValue(
1091 david.dillard  1.32         const char* name,
1092 mike           1.13         Real32& value) const
1093                         {
1094                             const XmlAttribute* attr = findAttribute(name);
1095                         
1096                             if (!attr)
1097 david.dillard  1.32             return false;
1098 mike           1.13     
1099                             const char* first;
1100                             const char* last;
1101                             _findEnds(attr->value, first, last);
1102                         
1103                             char* end = 0;
1104                             double tmp = strtod(first, &end);
1105                         
1106                             if (!end || end != last)
1107 david.dillard  1.32             return false;
1108 mike           1.13     
1109 david.dillard  1.32         value = static_cast<Real32>(tmp);
1110 mike           1.13         return true;
1111                         }
1112                         
1113                         Boolean XmlEntry::getAttributeValue(
1114 david.dillard  1.32         const char* name,
1115 mike           1.13         const char*& value) const
1116                         {
1117                             const XmlAttribute* attr = findAttribute(name);
1118                         
1119                             if (!attr)
1120 david.dillard  1.32             return false;
1121 mike           1.13     
1122                             value = attr->value;
1123                             return true;
1124                         }
1125                         
1126                         Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1127                         {
1128                             const char* tmp;
1129                         
1130                             if (!getAttributeValue(name, tmp))
1131 david.dillard  1.32             return false;
1132 mike           1.13     
1133 chuck          1.28         value = String(tmp);
1134 mike           1.13         return true;
1135                         }
1136                         
1137 mike           1.33     void XmlAppendCString(Buffer& out, const char* str)
1138 mike           1.13     {
1139 david.dillard  1.32         out.append(str, static_cast<Uint32>(strlen(str)));
1140 mike           1.13     }
1141                         
1142                         PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2