(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.38 //%2006////////////////////////////////////////////////////////////////////////
   2 mike  1.13 //
   3 karl  1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.27 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 karl  1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  12            // EMC Corporation; Symantec Corporation; The Open Group.
  13 mike  1.13 //
  14            // Permission is hereby granted, free of charge, to any person obtaining a copy
  15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
  16            // deal in the Software without restriction, including without limitation the
  17            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  18 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  19            // furnished to do so, subject to the following conditions:
  20 karl  1.38 // 
  21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  22 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  23            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  25            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  26            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  27 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29            //
  30            //==============================================================================
  31            //
  32            //%/////////////////////////////////////////////////////////////////////////////
  33            
  34            ////////////////////////////////////////////////////////////////////////////////
  35            //
  36            // XmlParser
  37            //
  38 david.dillard 1.32 //      This file contains a simple non-validating XML parser. Here are
  39                    //      serveral rules for well-formed XML:
  40 mike          1.13 //
  41 david.dillard 1.32 //          1.  Documents must begin with an XML declaration:
  42 mike          1.13 //
  43 david.dillard 1.32 //              <?xml version="1.0" standalone="yes"?>
  44 mike          1.13 //
  45 david.dillard 1.32 //          2.  Comments have the form:
  46 mike          1.13 //
  47 david.dillard 1.32 //              <!-- blah blah blah -->
  48 mike          1.13 //
  49 david.dillard 1.32 //          3. The following entity references are supported:
  50 mike          1.13 //
  51 david.dillard 1.32 //              &amp - ampersand
  52                    //              &lt - less-than
  53                    //              &gt - greater-than
  54                    //              &quot - full quote
  55                    //              &apos - apostrophe
  56 mike          1.13 //
  57 kumpf         1.18 //             as well as character (numeric) references:
  58 mike          1.35 //
  59 kumpf         1.18 //              &#49; - decimal reference for character '1'
  60                    //              &#x31; - hexadecimal reference for character '1'
  61                    //
  62 david.dillard 1.32 //          4. Element names and attribute names take the following form:
  63 mike          1.13 //
  64 david.dillard 1.32 //              [A-Za-z_][A-Za-z_0-9-.:]
  65 mike          1.13 //
  66 david.dillard 1.32 //          5.  Arbitrary data (CDATA) can be enclosed like this:
  67 mike          1.13 //
  68 david.dillard 1.32 //                  <![CDATA[
  69                    //                  ...
  70                    //                  ]]>
  71 mike          1.13 //
  72 david.dillard 1.32 //          6.  Element names and attributes names are case-sensitive.
  73 mike          1.13 //
  74 david.dillard 1.32 //          7.  XmlAttribute values must be delimited by full or half quotes.
  75                    //              XmlAttribute values must be delimited.
  76 mike          1.13 //
  77 david.dillard 1.32 //          8.  <!DOCTYPE...>
  78 mike          1.13 //
  79                    // TODO:
  80                    //
  81 kumpf         1.40 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
  82 mike          1.35 //      work. Handle <!DOCTYPE...> sections which are complicated (containing
  83 mike          1.13 //        rules rather than references to files).
  84                    //
  85 david.dillard 1.32 //      Remove newlines from string literals:
  86 mike          1.13 //
  87                    //          Example: <xyz x="hello
  88 david.dillard 1.32 //              world">
  89 mike          1.13 //
  90                    ////////////////////////////////////////////////////////////////////////////////
  91                    
  92 sage          1.14 #include <Pegasus/Common/Config.h>
  93 mike          1.13 #include <cctype>
  94                    #include <cstdio>
  95                    #include <cstdlib>
  96                    #include <cstring>
  97                    #include "XmlParser.h"
  98                    #include "Logger.h"
  99 chuck         1.19 #include "ExceptionRep.h"
 100 mike          1.34 #include "CharSet.h"
 101 mike          1.13 
 102                    PEGASUS_NAMESPACE_BEGIN
 103                    
 104                    ////////////////////////////////////////////////////////////////////////////////
 105                    //
 106                    // Static helper functions
 107                    //
 108                    ////////////////////////////////////////////////////////////////////////////////
 109                    
 110                    static void _printValue(const char* p)
 111                    {
 112                        for (; *p; p++)
 113                        {
 114 david.dillard 1.32         if (*p == '\n')
 115                                PEGASUS_STD(cout) << "\\n";
 116                            else if (*p == '\r')
 117                                PEGASUS_STD(cout) << "\\r";
 118                            else if (*p == '\t')
 119                                PEGASUS_STD(cout) << "\\t";
 120                            else
 121                                PEGASUS_STD(cout) << *p;
 122 mike          1.13     }
 123                    }
 124                    
 125                    struct EntityReference
 126                    {
 127                        const char* match;
 128                        Uint32 length;
 129                        char replacement;
 130                    };
 131                    
 132 kumpf         1.18 // ATTN: Add support for more entity references
 133 mike          1.13 static EntityReference _references[] =
 134                    {
 135                        { "&amp;", 5, '&' },
 136                        { "&lt;", 4, '<' },
 137                        { "&gt;", 4, '>' },
 138                        { "&quot;", 6, '"' },
 139                        { "&apos;", 6, '\'' }
 140                    };
 141                    
 142 chuck         1.26 
 143                    // Implements a check for a whitespace character, without calling
 144                    // isspace( ).  The isspace( ) function is locale-sensitive,
 145                    // and incorrectly flags some chars above 0x7f as whitespace.  This
 146                    // causes the XmlParser to incorrectly parse UTF-8 data.
 147                    //
 148                    // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
 149                    // defines white space as:
 150 david.dillard 1.32 // S    ::=    (#x20 | #x9 | #xD | #xA)+
 151 mike          1.34 static inline int _isspace(char c)
 152 chuck         1.26 {
 153 kumpf         1.36     return CharSet::isXmlWhiteSpace((Uint8)c);
 154 chuck         1.26 }
 155                    
 156 mike          1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 157                    
 158                    ////////////////////////////////////////////////////////////////////////////////
 159                    //
 160                    // XmlException
 161                    //
 162                    ////////////////////////////////////////////////////////////////////////////////
 163                    
 164                    static const char* _xmlMessages[] =
 165                    {
 166                        "Bad opening element",
 167                        "Bad closing element",
 168                        "Bad attribute name",
 169                        "Exepected equal sign",
 170                        "Bad attribute value",
 171                        "A \"--\" sequence found within comment",
 172                        "Unterminated comment",
 173                        "Unterminated CDATA block",
 174                        "Unterminated DOCTYPE",
 175                        "Too many attributes: parser only handles 10",
 176                        "Malformed reference",
 177 mike          1.13     "Expected a comment or CDATA following \"<!\" sequence",
 178                        "Closing element does not match opening element",
 179                        "One or more tags are still open",
 180                        "More than one root element was encountered",
 181                        "Validation error",
 182                        "Semantic error"
 183                    };
 184                    
 185 david.dillard 1.32 static const char* _xmlKeys[] =
 186 chuck         1.19 {
 187 humberto      1.20     "Common.XmlParser.BAD_START_TAG",
 188 chuck         1.19     "Common.XmlParser.BAD_END_TAG",
 189                        "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 190                        "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 191                        "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 192                        "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 193                        "Common.XmlParser.UNTERMINATED_COMMENT",
 194                        "Common.XmlParser.UNTERMINATED_CDATA",
 195                        "Common.XmlParser.UNTERMINATED_DOCTYPE",
 196                        "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 197                        "Common.XmlParser.MALFORMED_REFERENCE",
 198                        "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 199                        "Common.XmlParser.START_END_MISMATCH",
 200 david.dillard 1.32     "Common.XmlParser.UNCLOSED_TAGS",
 201 chuck         1.19     "Common.XmlParser.MULTIPLE_ROOTS",
 202                        "Common.XmlParser.VALIDATION_ERROR",
 203                        "Common.XmlParser.SEMANTIC_ERROR"
 204                    };
 205                    
 206 chuck         1.23 // l10n replace _formMessage (comment out the old one)
 207 chuck         1.19 /*
 208 mike          1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 209                    {
 210                        String result = _xmlMessages[Uint32(code) - 1];
 211                    
 212                        char buffer[32];
 213                        sprintf(buffer, "%d", line);
 214                        result.append(": on line ");
 215                        result.append(buffer);
 216                    
 217                        if (message.size())
 218                        {
 219 david.dillard 1.32         result.append(": ");
 220                            result.append(message);
 221 mike          1.13     }
 222                    
 223                        return result;
 224                    }
 225 chuck         1.19 */
 226                    
 227 kumpf         1.40 static MessageLoaderParms _formMessage(
 228                        Uint32 code,
 229                        Uint32 line,
 230                        const String& message)
 231 chuck         1.19 {
 232                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 233                        String key = _xmlKeys[Uint32(code) - 1];
 234 david.dillard 1.32         String msg = message;
 235 chuck         1.19 
 236                        dftMsg.append(": on line $0");
 237                        if (message.size())
 238                        {
 239 david.dillard 1.32         msg = ": " + msg;
 240                            dftMsg.append("$1");
 241                        }
 242 chuck         1.19 
 243                        return MessageLoaderParms(key, dftMsg, line ,msg);
 244                    }
 245                    
 246                    static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 247                    {
 248                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 249                        String key = _xmlKeys[Uint32(code) - 1];
 250                    
 251                        dftMsg.append(": on line $0");
 252 david.dillard 1.32 
 253 chuck         1.19     return MessageLoaderParms(key, dftMsg, line);
 254                    }
 255                    
 256 mike          1.13 
 257                    XmlException::XmlException(
 258 david.dillard 1.32     XmlException::Code code,
 259 mike          1.13     Uint32 lineNumber,
 260 david.dillard 1.32     const String& message)
 261 mike          1.13     : Exception(_formMessage(code, lineNumber, message))
 262                    {
 263                    
 264                    }
 265                    
 266 chuck         1.19 
 267                    XmlException::XmlException(
 268 david.dillard 1.32     XmlException::Code code,
 269 chuck         1.19     Uint32 lineNumber,
 270 david.dillard 1.32     MessageLoaderParms& msgParms)
 271 chuck         1.19     : Exception(_formPartialMessage(code, lineNumber))
 272                    {
 273 david.dillard 1.32         if (msgParms.default_msg.size())
 274 humberto      1.21     {
 275 david.dillard 1.32         msgParms.default_msg = ": " + msgParms.default_msg;
 276                        }
 277                            _rep->message.append(MessageLoader::getMessage(msgParms));
 278 chuck         1.19 }
 279                    
 280                    
 281 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 282                    //
 283                    // XmlValidationError
 284                    //
 285                    ////////////////////////////////////////////////////////////////////////////////
 286                    
 287                    XmlValidationError::XmlValidationError(
 288                        Uint32 lineNumber,
 289                        const String& message)
 290                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 291                    {
 292                    }
 293                    
 294 chuck         1.19 
 295                    XmlValidationError::XmlValidationError(
 296                        Uint32 lineNumber,
 297                        MessageLoaderParms& msgParms)
 298                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 299                    {
 300                    }
 301                    
 302                    
 303 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 304                    //
 305                    // XmlSemanticError
 306                    //
 307                    ////////////////////////////////////////////////////////////////////////////////
 308                    
 309                    XmlSemanticError::XmlSemanticError(
 310                        Uint32 lineNumber,
 311                        const String& message)
 312                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 313                    {
 314                    }
 315 chuck         1.19 
 316                    
 317                    XmlSemanticError::XmlSemanticError(
 318                        Uint32 lineNumber,
 319                        MessageLoaderParms& msgParms)
 320                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 321                    {
 322                    }
 323                    
 324 mike          1.13 
 325                    ////////////////////////////////////////////////////////////////////////////////
 326                    //
 327                    // XmlParser
 328                    //
 329                    ////////////////////////////////////////////////////////////////////////////////
 330                    
 331 kumpf         1.40 XmlParser::XmlParser(char* text)
 332                        : _line(1),
 333                          _text(text),
 334                          _current(text),
 335                          _restoreChar('\0'),
 336                          _foundRoot(false)
 337 mike          1.13 {
 338                    }
 339                    
 340 mike          1.34 inline void _skipWhitespace(Uint32& line, char*& p)
 341                    {
 342                        while (*p && _isspace(*p))
 343                        {
 344                            if (*p == '\n')
 345                                line++;
 346                    
 347                            p++;
 348                        }
 349                    }
 350                    
 351 kumpf         1.37 static int _getEntityRef(char*& p)
 352                    {
 353                        if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
 354                        {
 355                            p += 3;
 356                            return '>';
 357                        }
 358                    
 359                        if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
 360                        {
 361                            p += 3;
 362                            return '<';
 363                        }
 364                    
 365                        if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
 366                            (p[4] == ';'))
 367                        {
 368                            p += 5;
 369                            return '\'';
 370                        }
 371                    
 372 kumpf         1.37     if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
 373                            (p[4] == ';'))
 374                        {
 375                            p += 5;
 376                            return '"';
 377                        }
 378                    
 379                        if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
 380                        {
 381                            p += 4;
 382                            return '&';
 383                        }
 384                    
 385                        return -1;
 386                    }
 387                    
 388                    static inline int _getCharRef(char*& p, bool hex)
 389                    {
 390                        char* end;
 391                        unsigned long ch;
 392                    
 393 kumpf         1.37     if (hex)
 394                        {
 395                            ch = strtoul(p, &end, 16);
 396                        }
 397                        else
 398                        {
 399                            ch = strtoul(p, &end, 10);
 400                        }
 401                    
 402                        if ((end == p) || (*end != ';') || (ch > 255))
 403                        {
 404                            return -1;
 405                        }
 406                    
 407                        if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
 408                        {
 409                            return -1;
 410                        }
 411                    
 412                        p = end + 1;
 413                    
 414 kumpf         1.37     return ch;
 415                    }
 416                    
 417                    static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
 418                    {
 419                        // Skip over leading whitespace:
 420                    
 421                        _skipWhitespace(line, p);
 422                        start = p;
 423                    
 424                        // Process one character at a time:
 425                    
 426                        char* q = p;
 427                    
 428                        while (*p && (*p != end_char))
 429                        {
 430                            if (_isspace(*p))
 431                            {
 432                                // Compress sequences of whitespace characters to a single space
 433                                // character. Update line number when newlines encountered.
 434                    
 435 kumpf         1.37             if (*p++ == '\n')
 436                                {
 437                                    line++;
 438                                }
 439                    
 440                                *q++ = ' ';
 441                    
 442                                _skipWhitespace(line, p);
 443                            }
 444                            else if (*p == '&')
 445                            {
 446                                // Process entity characters and entity references:
 447                    
 448                                p++;
 449                                int ch;
 450                    
 451                                if (*p == '#')
 452                                {
 453                                    *p++;
 454                    
 455                                    if (*p == 'x')
 456 kumpf         1.37                 {
 457                                        p++;
 458                                        ch = _getCharRef(p, true);
 459                                    }
 460                                    else
 461                                    {
 462                                        ch = _getCharRef(p, false);
 463                                    }
 464                                }
 465                                else
 466                                {
 467                                    ch = _getEntityRef(p);
 468                                }
 469                    
 470                                if (ch == -1)
 471                                {
 472                                    throw XmlException(XmlException::MALFORMED_REFERENCE, line);
 473                                }
 474                    
 475                                *q++ = ch;
 476                            }
 477 kumpf         1.37         else
 478                            {
 479                                *q++ = *p++;
 480                            }
 481                        }
 482                    
 483 kumpf         1.40     // We encountered a the end_char or a zero-terminator.
 484 kumpf         1.37 
 485                        *q = *p;
 486                    
 487                        // Remove single trailing whitespace (consecutive whitespaces already
 488                        // compressed above).  Since p >= q, we can tell if we need to strip a
 489                        // trailing space from q by looking at the end of p.  We must not look at
 490                        // the last character of p, though, if p is an empty string.
 491                    
 492                        if ((p != start) && _isspace(p[-1]))
 493                        {
 494                            q--;
 495                        }
 496                    
 497                        // If q got behind p, it is safe and necessary to null-terminate q
 498                    
 499                        if (q != p)
 500                        {
 501                            *q = '\0';
 502                        }
 503                    }
 504                    
 505 mike          1.13 Boolean XmlParser::next(XmlEntry& entry)
 506                    {
 507                        if (!_putBackStack.isEmpty())
 508                        {
 509 david.dillard 1.32         entry = _putBackStack.top();
 510                            _putBackStack.pop();
 511                            return true;
 512 mike          1.13     }
 513                    
 514                        // If a character was overwritten with a null-terminator the last
 515                        // time this routine was called, then put back that character. Before
 516                        // exiting of course, restore the null-terminator.
 517                    
 518                        char* nullTerminator = 0;
 519                    
 520                        if (_restoreChar && !*_current)
 521                        {
 522 david.dillard 1.32         nullTerminator = _current;
 523                            *_current = _restoreChar;
 524                            _restoreChar = '\0';
 525 mike          1.13     }
 526                    
 527                        // Skip over any whitespace:
 528                    
 529 mike          1.34     _skipWhitespace(_line, _current);
 530 mike          1.13 
 531                        if (!*_current)
 532                        {
 533 david.dillard 1.32         if (nullTerminator)
 534                                *nullTerminator = '\0';
 535 mike          1.13 
 536 david.dillard 1.32         if (!_stack.isEmpty())
 537                                throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 538 mike          1.13 
 539 david.dillard 1.32         return false;
 540 mike          1.13     }
 541                    
 542                        // Either a "<...>" or content begins next:
 543                    
 544                        if (*_current == '<')
 545                        {
 546 david.dillard 1.32         _current++;
 547                            _getElement(_current, entry);
 548 mike          1.13 
 549 david.dillard 1.32         if (nullTerminator)
 550                                *nullTerminator = '\0';
 551 mike          1.13 
 552 david.dillard 1.32         if (entry.type == XmlEntry::START_TAG)
 553                            {
 554                                if (_stack.isEmpty() && _foundRoot)
 555                                    throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 556                    
 557                                _foundRoot = true;
 558                                _stack.push((char*)entry.text);
 559                            }
 560                            else if (entry.type == XmlEntry::END_TAG)
 561                            {
 562                                if (_stack.isEmpty())
 563                                    throw XmlException(XmlException::START_END_MISMATCH, _line);
 564 mike          1.13 
 565 david.dillard 1.32             if (strcmp(_stack.top(), entry.text) != 0)
 566                                    throw XmlException(XmlException::START_END_MISMATCH, _line);
 567 mike          1.13 
 568 david.dillard 1.32             _stack.pop();
 569                            }
 570 mike          1.13 
 571 david.dillard 1.32         return true;
 572 mike          1.13     }
 573                        else
 574                        {
 575 kumpf         1.37         // Normalize the content:
 576                    
 577                            char* start;
 578                            _normalize(_line, _current, '<', start);
 579                    
 580                            // Get the content:
 581                    
 582 david.dillard 1.32         entry.type = XmlEntry::CONTENT;
 583 kumpf         1.37         entry.text = start;
 584                    
 585                            // Overwrite '<' with a null character (temporarily).
 586                    
 587 david.dillard 1.32         _restoreChar = *_current;
 588                            *_current = '\0';
 589 mike          1.13 
 590 david.dillard 1.32         if (nullTerminator)
 591                                *nullTerminator = '\0';
 592 mike          1.13 
 593 david.dillard 1.32         return true;
 594 mike          1.13     }
 595                    }
 596                    
 597                    void XmlParser::putBack(XmlEntry& entry)
 598                    {
 599                        _putBackStack.push(entry);
 600                    }
 601                    
 602                    XmlParser::~XmlParser()
 603                    {
 604                        // Nothing to do!
 605                    }
 606                    
 607 mike          1.35 // A-Za-z0-9_-:.
 608 kumpf         1.40 static unsigned char _isInnerElementChar[] =
 609 mike          1.35 {
 610                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 611                        0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
 612                        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 613                        1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 614                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 615                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 616                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 617                    };
 618                    
 619 mike          1.13 Boolean XmlParser::_getElementName(char*& p)
 620                    {
 621 mike          1.35     if (!CharSet::isAlNumUnder(Uint8(*p)))
 622 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 623 mike          1.35 
 624 kumpf         1.24     p++;
 625 mike          1.13 
 626 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 627 david.dillard 1.32         p++;
 628 mike          1.13 
 629                        // The next character must be a space:
 630                    
 631 chuck         1.26     if (_isspace(*p))
 632 mike          1.13     {
 633 david.dillard 1.32         *p++ = '\0';
 634 mike          1.34         _skipWhitespace(_line, p);
 635 mike          1.13     }
 636                    
 637                        if (*p == '>')
 638                        {
 639 david.dillard 1.32         *p++ = '\0';
 640                            return true;
 641 mike          1.13     }
 642                    
 643                        return false;
 644                    }
 645                    
 646                    Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 647                    {
 648                        openCloseElement = false;
 649                    
 650 mike          1.35     if (!CharSet::isAlNumUnder(Uint8(*p)))
 651 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 652 mike          1.35 
 653 kumpf         1.24     p++;
 654 mike          1.13 
 655 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 656 david.dillard 1.32         p++;
 657 mike          1.13 
 658                        // The next character must be a space:
 659                    
 660 chuck         1.26     if (_isspace(*p))
 661 mike          1.13     {
 662 david.dillard 1.32         *p++ = '\0';
 663 mike          1.34         _skipWhitespace(_line, p);
 664 mike          1.13     }
 665                    
 666                        if (*p == '>')
 667                        {
 668 david.dillard 1.32         *p++ = '\0';
 669                            return true;
 670 mike          1.13     }
 671                    
 672                        if (p[0] == '/' && p[1] == '>')
 673                        {
 674 david.dillard 1.32         openCloseElement = true;
 675                            *p = '\0';
 676                            p += 2;
 677                            return true;
 678 mike          1.13     }
 679                    
 680                        return false;
 681                    }
 682                    
 683                    void XmlParser::_getAttributeNameAndEqual(char*& p)
 684                    {
 685 mike          1.35     if (!CharSet::isAlNumUnder((Uint8)*p))
 686 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 687 mike          1.35 
 688 kumpf         1.24     p++;
 689 mike          1.13 
 690 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 691 david.dillard 1.32         p++;
 692 mike          1.13 
 693                        char* term = p;
 694                    
 695 mike          1.34     _skipWhitespace(_line, p);
 696 mike          1.13 
 697                        if (*p != '=')
 698 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 699 mike          1.13 
 700                        p++;
 701                    
 702 mike          1.34     _skipWhitespace(_line, p);
 703 mike          1.13 
 704                        *term = '\0';
 705                    }
 706                    
 707                    void XmlParser::_getComment(char*& p)
 708                    {
 709                        // Now p points to first non-whitespace character beyond "<--" sequence:
 710                    
 711                        for (; *p; p++)
 712                        {
 713 david.dillard 1.32         if (p[0] == '-' && p[1] == '-')
 714                            {
 715                                if (p[2] != '>')
 716                                {
 717                                    throw XmlException(
 718                                        XmlException::MINUS_MINUS_IN_COMMENT, _line);
 719                                }
 720                    
 721                                // Find end of comment (excluding whitespace):
 722                    
 723                                *p = '\0';
 724                                p += 3;
 725                                return;
 726                            }
 727 mike          1.13     }
 728                    
 729                        // If it got this far, then the comment is unterminated:
 730                    
 731                        throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 732                    }
 733                    
 734                    void XmlParser::_getCData(char*& p)
 735                    {
 736                        // At this point p points one past "<![CDATA[" sequence:
 737                    
 738                        for (; *p; p++)
 739                        {
 740 david.dillard 1.32         if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 741                            {
 742                                *p = '\0';
 743                                p += 3;
 744                                return;
 745                            }
 746                            else if (*p == '\n')
 747                                _line++;
 748 mike          1.13     }
 749                    
 750                        // If it got this far, then the comment is unterminated:
 751                    
 752                        throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 753                    }
 754                    
 755                    void XmlParser::_getDocType(char*& p)
 756                    {
 757                        // Just ignore the DOCTYPE command for now:
 758                    
 759                        for (; *p && *p != '>'; p++)
 760                        {
 761 david.dillard 1.32         if (*p == '\n')
 762                                _line++;
 763 mike          1.13     }
 764                    
 765                        if (*p != '>')
 766 david.dillard 1.32         throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 767 mike          1.13 
 768                        p++;
 769                    }
 770                    
 771                    void XmlParser::_getElement(char*& p, XmlEntry& entry)
 772                    {
 773                        entry.attributeCount = 0;
 774                    
 775                        //--------------------------------------------------------------------------
 776                        // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 777                        //--------------------------------------------------------------------------
 778                    
 779                        if (*p == '?')
 780                        {
 781 david.dillard 1.32         entry.type = XmlEntry::XML_DECLARATION;
 782                            entry.text = ++p;
 783 mike          1.13 
 784 david.dillard 1.32         Boolean openCloseElement = false;
 785 mike          1.13 
 786 david.dillard 1.32         if (_getElementName(p))
 787                                return;
 788 mike          1.13     }
 789                        else if (*p == '!')
 790                        {
 791 david.dillard 1.32         p++;
 792 mike          1.13 
 793 david.dillard 1.32         // Expect a comment or CDATA:
 794 mike          1.13 
 795 david.dillard 1.32         if (p[0] == '-' && p[1] == '-')
 796                            {
 797                                p += 2;
 798                                entry.type = XmlEntry::COMMENT;
 799                                entry.text = p;
 800                                _getComment(p);
 801                                return;
 802                            }
 803                            else if (memcmp(p, "[CDATA[", 7) == 0)
 804                            {
 805                                p += 7;
 806                                entry.type = XmlEntry::CDATA;
 807                                entry.text = p;
 808                                _getCData(p);
 809                                return;
 810                            }
 811                            else if (memcmp(p, "DOCTYPE", 7) == 0)
 812                            {
 813                                entry.type = XmlEntry::DOCTYPE;
 814 kumpf         1.37             entry.text = "";
 815 david.dillard 1.32             _getDocType(p);
 816                                return;
 817                            }
 818                            throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 819 mike          1.13     }
 820                        else if (*p == '/')
 821                        {
 822 david.dillard 1.32         entry.type = XmlEntry::END_TAG;
 823                            entry.text = ++p;
 824 mike          1.13 
 825 david.dillard 1.32         if (!_getElementName(p))
 826                                throw(XmlException(XmlException::BAD_END_TAG, _line));
 827 mike          1.13 
 828 david.dillard 1.32         return;
 829 mike          1.13     }
 830 david         1.25     else if ((((*p >= 'A') && (*p <= 'Z')) ||
 831 kumpf         1.24               ((*p >= 'a') && (*p <= 'z')) ||
 832                                  (*p == '_')))
 833 mike          1.13     {
 834 david.dillard 1.32         entry.type = XmlEntry::START_TAG;
 835                            entry.text = p;
 836 mike          1.13 
 837 david.dillard 1.32         Boolean openCloseElement = false;
 838 mike          1.13 
 839 david.dillard 1.32         if (_getOpenElementName(p, openCloseElement))
 840                            {
 841                                if (openCloseElement)
 842                                    entry.type = XmlEntry::EMPTY_TAG;
 843                                return;
 844                            }
 845 mike          1.13     }
 846                        else
 847 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 848 mike          1.13 
 849                        //--------------------------------------------------------------------------
 850                        // Grab all the attributes:
 851                        //--------------------------------------------------------------------------
 852                    
 853                        for (;;)
 854                        {
 855 david.dillard 1.32         if (entry.type == XmlEntry::XML_DECLARATION)
 856                            {
 857                                if (p[0] == '?' && p[1] == '>')
 858                                {
 859                                    p += 2;
 860                                    return;
 861                                }
 862                            }
 863                            else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 864                            {
 865                                entry.type = XmlEntry::EMPTY_TAG;
 866                                p += 2;
 867                                return;
 868                            }
 869                            else if (*p == '>')
 870                            {
 871                                p++;
 872                                return;
 873                            }
 874                    
 875                            XmlAttribute attr;
 876 david.dillard 1.32         attr.name = p;
 877                            _getAttributeNameAndEqual(p);
 878                    
 879 kumpf         1.37         // Get the attribute value (e.g., "some value")
 880                            {
 881                                if ((*p != '"') && (*p != '\''))
 882                                {
 883                                    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 884                                }
 885                    
 886                                char quote = *p++;
 887                    
 888                                char* start;
 889                                _normalize(_line, p, quote, start);
 890                                attr.value = start;
 891                    
 892                                if (*p != quote)
 893                                {
 894                                    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 895                                }
 896                    
 897                                // Overwrite the closing quote with a null-terminator:
 898 david.dillard 1.32 
 899 kumpf         1.37             *p++ = '\0';
 900                            }
 901 david.dillard 1.32 
 902                            if (entry.type == XmlEntry::XML_DECLARATION)
 903                            {
 904                                // The next thing must a space or a "?>":
 905 mike          1.13 
 906 david.dillard 1.32             if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
 907                                {
 908                                    throw XmlException(
 909                                        XmlException::BAD_ATTRIBUTE_VALUE, _line);
 910                                }
 911                            }
 912                            else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
 913                            {
 914                                // The next thing must be a space or a '>':
 915 mike          1.13 
 916 david.dillard 1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 917                            }
 918 mike          1.13 
 919 mike          1.34         _skipWhitespace(_line, p);
 920 david.dillard 1.32 
 921                            if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 922                                throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 923                    
 924                            entry.attributes[entry.attributeCount++] = attr;
 925 mike          1.13     }
 926                    }
 927                    
 928                    static const char* _typeStrings[] =
 929                    {
 930 david.dillard 1.32     "XML_DECLARATION",
 931                        "START_TAG",
 932                        "EMPTY_TAG",
 933                        "END_TAG",
 934 mike          1.13     "COMMENT",
 935                        "CDATA",
 936                        "DOCTYPE",
 937 david.dillard 1.32     "CONTENT"
 938 mike          1.13 };
 939                    
 940                    void XmlEntry::print() const
 941                    {
 942                        PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
 943                    
 944                        Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
 945                    
 946                        if (needQuotes)
 947 david.dillard 1.32         PEGASUS_STD(cout) << "\"";
 948                    
 949 mike          1.13     _printValue(text);
 950                    
 951                        if (needQuotes)
 952 david.dillard 1.32         PEGASUS_STD(cout) << "\"";
 953 mike          1.13 
 954                        PEGASUS_STD(cout) << '\n';
 955                    
 956                        for (Uint32 i = 0; i < attributeCount; i++)
 957                        {
 958 david.dillard 1.32         PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
 959                            _printValue(attributes[i].value);
 960                            PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
 961 mike          1.13     }
 962                    }
 963                    
 964                    const XmlAttribute* XmlEntry::findAttribute(
 965                        const char* name) const
 966                    {
 967                        for (Uint32 i = 0; i < attributeCount; i++)
 968                        {
 969 david.dillard 1.32         if (strcmp(attributes[i].name, name) == 0)
 970                                return &attributes[i];
 971 mike          1.13     }
 972                    
 973                        return 0;
 974                    }
 975                    
 976                    // Find first non-whitespace character (set first) and last non-whitespace
 977                    // character (set last one past this). For example, consider this string:
 978                    //
 979 david.dillard 1.32 //      "   87     "
 980 mike          1.13 //
 981                    // The first pointer would point to '8' and the last pointer woudl point one
 982                    // beyond '7'.
 983                    
 984                    static void _findEnds(
 985 david.dillard 1.32     const char* str,
 986                        const char*& first,
 987 mike          1.13     const char*& last)
 988                    {
 989                        first = str;
 990                    
 991 chuck         1.26     while (_isspace(*first))
 992 david.dillard 1.32         first++;
 993 mike          1.13 
 994                        if (!*first)
 995                        {
 996 david.dillard 1.32         last = first;
 997                            return;
 998 mike          1.13     }
 999                    
1000                        last = first + strlen(first);
1001                    
1002 chuck         1.26     while (last != first && _isspace(last[-1]))
1003 david.dillard 1.32         last--;
1004 mike          1.13 }
1005                    
1006                    Boolean XmlEntry::getAttributeValue(
1007 david.dillard 1.32     const char* name,
1008 mike          1.13     Uint32& value) const
1009                    {
1010                        const XmlAttribute* attr = findAttribute(name);
1011                    
1012                        if (!attr)
1013 david.dillard 1.32         return false;
1014 mike          1.13 
1015                        const char* first;
1016                        const char* last;
1017                        _findEnds(attr->value, first, last);
1018                    
1019                        char* end = 0;
1020                        long tmp = strtol(first, &end, 10);
1021                    
1022                        if (!end || end != last)
1023 david.dillard 1.32         return false;
1024 mike          1.13 
1025                        value = Uint32(tmp);
1026                        return true;
1027                    }
1028                    
1029                    Boolean XmlEntry::getAttributeValue(
1030 david.dillard 1.32     const char* name,
1031 mike          1.13     Real32& value) const
1032                    {
1033                        const XmlAttribute* attr = findAttribute(name);
1034                    
1035                        if (!attr)
1036 david.dillard 1.32         return false;
1037 mike          1.13 
1038                        const char* first;
1039                        const char* last;
1040                        _findEnds(attr->value, first, last);
1041                    
1042                        char* end = 0;
1043                        double tmp = strtod(first, &end);
1044                    
1045                        if (!end || end != last)
1046 david.dillard 1.32         return false;
1047 mike          1.13 
1048 david.dillard 1.32     value = static_cast<Real32>(tmp);
1049 mike          1.13     return true;
1050                    }
1051                    
1052                    Boolean XmlEntry::getAttributeValue(
1053 david.dillard 1.32     const char* name,
1054 mike          1.13     const char*& value) const
1055                    {
1056                        const XmlAttribute* attr = findAttribute(name);
1057                    
1058                        if (!attr)
1059 david.dillard 1.32         return false;
1060 mike          1.13 
1061                        value = attr->value;
1062                        return true;
1063                    }
1064                    
1065                    Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1066                    {
1067                        const char* tmp;
1068                    
1069                        if (!getAttributeValue(name, tmp))
1070 david.dillard 1.32         return false;
1071 mike          1.13 
1072 chuck         1.28     value = String(tmp);
1073 mike          1.13     return true;
1074                    }
1075                    
1076 mike          1.33 void XmlAppendCString(Buffer& out, const char* str)
1077 mike          1.13 {
1078 david.dillard 1.32     out.append(str, static_cast<Uint32>(strlen(str)));
1079 mike          1.13 }
1080                    
1081                    PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2