(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.38 //%2006////////////////////////////////////////////////////////////////////////
   2 mike  1.13 //
   3 karl  1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.27 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 karl  1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  12            // EMC Corporation; Symantec Corporation; The Open Group.
  13 mike  1.13 //
  14            // Permission is hereby granted, free of charge, to any person obtaining a copy
  15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
  16            // deal in the Software without restriction, including without limitation the
  17            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  18 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  19            // furnished to do so, subject to the following conditions:
  20 karl  1.38 // 
  21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  22 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  23            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  25            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  26            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  27 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29            //
  30            //==============================================================================
  31            //
  32            // Author: Mike Brasher (mbrasher@bmc.com)
  33            //
  34 david.dillard 1.30 // Modified By: David Dillard, VERITAS Software Corp.
  35                    //                  (david.dillard@veritas.com)
  36 mike          1.13 //
  37                    //%/////////////////////////////////////////////////////////////////////////////
  38                    
  39                    ////////////////////////////////////////////////////////////////////////////////
  40                    //
  41                    // XmlParser
  42                    //
  43 david.dillard 1.32 //      This file contains a simple non-validating XML parser. Here are
  44                    //      serveral rules for well-formed XML:
  45 mike          1.13 //
  46 david.dillard 1.32 //          1.  Documents must begin with an XML declaration:
  47 mike          1.13 //
  48 david.dillard 1.32 //              <?xml version="1.0" standalone="yes"?>
  49 mike          1.13 //
  50 david.dillard 1.32 //          2.  Comments have the form:
  51 mike          1.13 //
  52 david.dillard 1.32 //              <!-- blah blah blah -->
  53 mike          1.13 //
  54 david.dillard 1.32 //          3. The following entity references are supported:
  55 mike          1.13 //
  56 david.dillard 1.32 //              &amp - ampersand
  57                    //              &lt - less-than
  58                    //              &gt - greater-than
  59                    //              &quot - full quote
  60                    //              &apos - apostrophe
  61 mike          1.13 //
  62 kumpf         1.18 //             as well as character (numeric) references:
  63 mike          1.35 //
  64 kumpf         1.18 //              &#49; - decimal reference for character '1'
  65                    //              &#x31; - hexadecimal reference for character '1'
  66                    //
  67 david.dillard 1.32 //          4. Element names and attribute names take the following form:
  68 mike          1.13 //
  69 david.dillard 1.32 //              [A-Za-z_][A-Za-z_0-9-.:]
  70 mike          1.13 //
  71 david.dillard 1.32 //          5.  Arbitrary data (CDATA) can be enclosed like this:
  72 mike          1.13 //
  73 david.dillard 1.32 //                  <![CDATA[
  74                    //                  ...
  75                    //                  ]]>
  76 mike          1.13 //
  77 david.dillard 1.32 //          6.  Element names and attributes names are case-sensitive.
  78 mike          1.13 //
  79 david.dillard 1.32 //          7.  XmlAttribute values must be delimited by full or half quotes.
  80                    //              XmlAttribute values must be delimited.
  81 mike          1.13 //
  82 david.dillard 1.32 //          8.  <!DOCTYPE...>
  83 mike          1.13 //
  84                    // TODO:
  85                    //
  86 mike          1.35 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is 
  87                    //      work. Handle <!DOCTYPE...> sections which are complicated (containing
  88 mike          1.13 //        rules rather than references to files).
  89                    //
  90 david.dillard 1.32 //      Remove newlines from string literals:
  91 mike          1.13 //
  92                    //          Example: <xyz x="hello
  93 david.dillard 1.32 //              world">
  94 mike          1.13 //
  95                    ////////////////////////////////////////////////////////////////////////////////
  96                    
  97 sage          1.14 #include <Pegasus/Common/Config.h>
  98 mike          1.13 #include <cctype>
  99                    #include <cstdio>
 100                    #include <cstdlib>
 101                    #include <cstring>
 102                    #include "XmlParser.h"
 103                    #include "Logger.h"
 104 chuck         1.19 #include "ExceptionRep.h"
 105 mike          1.34 #include "CharSet.h"
 106 mike          1.13 
 107                    PEGASUS_NAMESPACE_BEGIN
 108                    
 109                    ////////////////////////////////////////////////////////////////////////////////
 110                    //
 111                    // Static helper functions
 112                    //
 113                    ////////////////////////////////////////////////////////////////////////////////
 114                    
 115                    static void _printValue(const char* p)
 116                    {
 117                        for (; *p; p++)
 118                        {
 119 david.dillard 1.32         if (*p == '\n')
 120                                PEGASUS_STD(cout) << "\\n";
 121                            else if (*p == '\r')
 122                                PEGASUS_STD(cout) << "\\r";
 123                            else if (*p == '\t')
 124                                PEGASUS_STD(cout) << "\\t";
 125                            else
 126                                PEGASUS_STD(cout) << *p;
 127 mike          1.13     }
 128                    }
 129                    
 130                    struct EntityReference
 131                    {
 132                        const char* match;
 133                        Uint32 length;
 134                        char replacement;
 135                    };
 136                    
 137 kumpf         1.18 // ATTN: Add support for more entity references
 138 mike          1.13 static EntityReference _references[] =
 139                    {
 140                        { "&amp;", 5, '&' },
 141                        { "&lt;", 4, '<' },
 142                        { "&gt;", 4, '>' },
 143                        { "&quot;", 6, '"' },
 144                        { "&apos;", 6, '\'' }
 145                    };
 146                    
 147 chuck         1.26 
 148                    // Implements a check for a whitespace character, without calling
 149                    // isspace( ).  The isspace( ) function is locale-sensitive,
 150                    // and incorrectly flags some chars above 0x7f as whitespace.  This
 151                    // causes the XmlParser to incorrectly parse UTF-8 data.
 152                    //
 153                    // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
 154                    // defines white space as:
 155 david.dillard 1.32 // S    ::=    (#x20 | #x9 | #xD | #xA)+
 156 mike          1.34 static inline int _isspace(char c)
 157 chuck         1.26 {
 158 kumpf         1.36     return CharSet::isXmlWhiteSpace((Uint8)c);
 159 chuck         1.26 }
 160                    
 161 mike          1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 162                    
 163                    ////////////////////////////////////////////////////////////////////////////////
 164                    //
 165                    // XmlException
 166                    //
 167                    ////////////////////////////////////////////////////////////////////////////////
 168                    
 169                    static const char* _xmlMessages[] =
 170                    {
 171                        "Bad opening element",
 172                        "Bad closing element",
 173                        "Bad attribute name",
 174                        "Exepected equal sign",
 175                        "Bad attribute value",
 176                        "A \"--\" sequence found within comment",
 177                        "Unterminated comment",
 178                        "Unterminated CDATA block",
 179                        "Unterminated DOCTYPE",
 180                        "Too many attributes: parser only handles 10",
 181                        "Malformed reference",
 182 mike          1.13     "Expected a comment or CDATA following \"<!\" sequence",
 183                        "Closing element does not match opening element",
 184                        "One or more tags are still open",
 185                        "More than one root element was encountered",
 186                        "Validation error",
 187                        "Semantic error"
 188                    };
 189                    
 190 david.dillard 1.32 static const char* _xmlKeys[] =
 191 chuck         1.19 {
 192 humberto      1.20     "Common.XmlParser.BAD_START_TAG",
 193 chuck         1.19     "Common.XmlParser.BAD_END_TAG",
 194                        "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 195                        "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 196                        "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 197                        "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 198                        "Common.XmlParser.UNTERMINATED_COMMENT",
 199                        "Common.XmlParser.UNTERMINATED_CDATA",
 200                        "Common.XmlParser.UNTERMINATED_DOCTYPE",
 201                        "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 202                        "Common.XmlParser.MALFORMED_REFERENCE",
 203                        "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 204                        "Common.XmlParser.START_END_MISMATCH",
 205 david.dillard 1.32     "Common.XmlParser.UNCLOSED_TAGS",
 206 chuck         1.19     "Common.XmlParser.MULTIPLE_ROOTS",
 207                        "Common.XmlParser.VALIDATION_ERROR",
 208                        "Common.XmlParser.SEMANTIC_ERROR"
 209                    };
 210                    
 211 chuck         1.23 // l10n replace _formMessage (comment out the old one)
 212 chuck         1.19 /*
 213 mike          1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 214                    {
 215                        String result = _xmlMessages[Uint32(code) - 1];
 216                    
 217                        char buffer[32];
 218                        sprintf(buffer, "%d", line);
 219                        result.append(": on line ");
 220                        result.append(buffer);
 221                    
 222                        if (message.size())
 223                        {
 224 david.dillard 1.32         result.append(": ");
 225                            result.append(message);
 226 mike          1.13     }
 227                    
 228                        return result;
 229                    }
 230 chuck         1.19 */
 231                    
 232                    static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 233                    {
 234                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 235                        String key = _xmlKeys[Uint32(code) - 1];
 236 david.dillard 1.32         String msg = message;
 237 chuck         1.19 
 238                        dftMsg.append(": on line $0");
 239                        if (message.size())
 240                        {
 241 david.dillard 1.32         msg = ": " + msg;
 242                            dftMsg.append("$1");
 243                        }
 244 chuck         1.19 
 245                        return MessageLoaderParms(key, dftMsg, line ,msg);
 246                    }
 247                    
 248                    static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 249                    {
 250                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 251                        String key = _xmlKeys[Uint32(code) - 1];
 252                    
 253                        dftMsg.append(": on line $0");
 254 david.dillard 1.32 
 255 chuck         1.19     return MessageLoaderParms(key, dftMsg, line);
 256                    }
 257                    
 258 mike          1.13 
 259                    XmlException::XmlException(
 260 david.dillard 1.32     XmlException::Code code,
 261 mike          1.13     Uint32 lineNumber,
 262 david.dillard 1.32     const String& message)
 263 mike          1.13     : Exception(_formMessage(code, lineNumber, message))
 264                    {
 265                    
 266                    }
 267                    
 268 chuck         1.19 
 269                    XmlException::XmlException(
 270 david.dillard 1.32     XmlException::Code code,
 271 chuck         1.19     Uint32 lineNumber,
 272 david.dillard 1.32     MessageLoaderParms& msgParms)
 273 chuck         1.19     : Exception(_formPartialMessage(code, lineNumber))
 274                    {
 275 david.dillard 1.32         if (msgParms.default_msg.size())
 276 humberto      1.21     {
 277 david.dillard 1.32         msgParms.default_msg = ": " + msgParms.default_msg;
 278                        }
 279                            _rep->message.append(MessageLoader::getMessage(msgParms));
 280 chuck         1.19 }
 281                    
 282                    
 283 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 284                    //
 285                    // XmlValidationError
 286                    //
 287                    ////////////////////////////////////////////////////////////////////////////////
 288                    
 289                    XmlValidationError::XmlValidationError(
 290                        Uint32 lineNumber,
 291                        const String& message)
 292                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 293                    {
 294                    
 295                    }
 296                    
 297 chuck         1.19 
 298                    XmlValidationError::XmlValidationError(
 299                        Uint32 lineNumber,
 300                        MessageLoaderParms& msgParms)
 301                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 302                    {
 303                    
 304                    }
 305                    
 306                    
 307 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 308                    //
 309                    // XmlSemanticError
 310                    //
 311                    ////////////////////////////////////////////////////////////////////////////////
 312                    
 313                    XmlSemanticError::XmlSemanticError(
 314                        Uint32 lineNumber,
 315                        const String& message)
 316                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 317                    {
 318                    
 319                    }
 320 chuck         1.19 
 321                    
 322                    XmlSemanticError::XmlSemanticError(
 323                        Uint32 lineNumber,
 324                        MessageLoaderParms& msgParms)
 325                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 326                    {
 327                    
 328                    }
 329                    
 330 mike          1.13 
 331                    ////////////////////////////////////////////////////////////////////////////////
 332                    //
 333                    // XmlParser
 334                    //
 335                    ////////////////////////////////////////////////////////////////////////////////
 336                    
 337 david.dillard 1.32 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
 338 mike          1.13     _restoreChar('\0'), _foundRoot(false)
 339                    {
 340                    
 341                    }
 342                    
 343 mike          1.34 inline void _skipWhitespace(Uint32& line, char*& p)
 344                    {
 345                        while (*p && _isspace(*p))
 346                        {
 347                            if (*p == '\n')
 348                                line++;
 349                    
 350                            p++;
 351                        }
 352                    }
 353                    
 354 kumpf         1.37 static int _getEntityRef(char*& p)
 355                    {
 356                        if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
 357                        {
 358                            p += 3;
 359                            return '>';
 360                        }
 361                    
 362                        if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
 363                        {
 364                            p += 3;
 365                            return '<';
 366                        }
 367                    
 368                        if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
 369                            (p[4] == ';'))
 370                        {
 371                            p += 5;
 372                            return '\'';
 373                        }
 374                    
 375 kumpf         1.37     if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
 376                            (p[4] == ';'))
 377                        {
 378                            p += 5;
 379                            return '"';
 380                        }
 381                    
 382                        if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
 383                        {
 384                            p += 4;
 385                            return '&';
 386                        }
 387                    
 388                        return -1;
 389                    }
 390                    
 391                    static inline int _getCharRef(char*& p, bool hex)
 392                    {
 393                        char* end;
 394                        unsigned long ch;
 395                    
 396 kumpf         1.37     if (hex)
 397                        {
 398                            ch = strtoul(p, &end, 16);
 399                        }
 400                        else
 401                        {
 402                            ch = strtoul(p, &end, 10);
 403                        }
 404                    
 405                        if ((end == p) || (*end != ';') || (ch > 255))
 406                        {
 407                            return -1;
 408                        }
 409                    
 410                        if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
 411                        {
 412                            return -1;
 413                        }
 414                    
 415                        p = end + 1;
 416                    
 417 kumpf         1.37     return ch;
 418                    }
 419                    
 420                    static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
 421                    {
 422                        // Skip over leading whitespace:
 423                    
 424                        _skipWhitespace(line, p);
 425                        start = p;
 426                    
 427                        // Process one character at a time:
 428                    
 429                        char* q = p;
 430                    
 431                        while (*p && (*p != end_char))
 432                        {
 433                            if (_isspace(*p))
 434                            {
 435                                // Compress sequences of whitespace characters to a single space
 436                                // character. Update line number when newlines encountered.
 437                    
 438 kumpf         1.37             if (*p++ == '\n')
 439                                {
 440                                    line++;
 441                                }
 442                    
 443                                *q++ = ' ';
 444                    
 445                                _skipWhitespace(line, p);
 446                            }
 447                            else if (*p == '&')
 448                            {
 449                                // Process entity characters and entity references:
 450                    
 451                                p++;
 452                                int ch;
 453                    
 454                                if (*p == '#')
 455                                {
 456                                    *p++;
 457                    
 458                                    if (*p == 'x')
 459 kumpf         1.37                 {
 460                                        p++;
 461                                        ch = _getCharRef(p, true);
 462                                    }
 463                                    else
 464                                    {
 465                                        ch = _getCharRef(p, false);
 466                                    }
 467                                }
 468                                else
 469                                {
 470                                    ch = _getEntityRef(p);
 471                                }
 472                    
 473                                if (ch == -1)
 474                                {
 475                                    throw XmlException(XmlException::MALFORMED_REFERENCE, line);
 476                                }
 477                    
 478                                *q++ = ch;
 479                            }
 480 kumpf         1.37         else
 481                            {
 482                                *q++ = *p++;
 483                            }
 484                        }
 485                    
 486                        // We encountered a the end_char or a zero-terminator. 
 487                    
 488                        *q = *p;
 489                    
 490                        // Remove single trailing whitespace (consecutive whitespaces already
 491                        // compressed above).  Since p >= q, we can tell if we need to strip a
 492                        // trailing space from q by looking at the end of p.  We must not look at
 493                        // the last character of p, though, if p is an empty string.
 494                    
 495                        if ((p != start) && _isspace(p[-1]))
 496                        {
 497                            q--;
 498                        }
 499                    
 500                        // If q got behind p, it is safe and necessary to null-terminate q
 501 kumpf         1.37 
 502                        if (q != p)
 503                        {
 504                            *q = '\0';
 505                        }
 506                    }
 507                    
 508 mike          1.13 Boolean XmlParser::next(XmlEntry& entry)
 509                    {
 510                        if (!_putBackStack.isEmpty())
 511                        {
 512 david.dillard 1.32         entry = _putBackStack.top();
 513                            _putBackStack.pop();
 514                            return true;
 515 mike          1.13     }
 516                    
 517                        // If a character was overwritten with a null-terminator the last
 518                        // time this routine was called, then put back that character. Before
 519                        // exiting of course, restore the null-terminator.
 520                    
 521                        char* nullTerminator = 0;
 522                    
 523                        if (_restoreChar && !*_current)
 524                        {
 525 david.dillard 1.32         nullTerminator = _current;
 526                            *_current = _restoreChar;
 527                            _restoreChar = '\0';
 528 mike          1.13     }
 529                    
 530                        // Skip over any whitespace:
 531                    
 532 mike          1.34     _skipWhitespace(_line, _current);
 533 mike          1.13 
 534                        if (!*_current)
 535                        {
 536 david.dillard 1.32         if (nullTerminator)
 537                                *nullTerminator = '\0';
 538 mike          1.13 
 539 david.dillard 1.32         if (!_stack.isEmpty())
 540                                throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 541 mike          1.13 
 542 david.dillard 1.32         return false;
 543 mike          1.13     }
 544                    
 545                        // Either a "<...>" or content begins next:
 546                    
 547                        if (*_current == '<')
 548                        {
 549 david.dillard 1.32         _current++;
 550                            _getElement(_current, entry);
 551 mike          1.13 
 552 david.dillard 1.32         if (nullTerminator)
 553                                *nullTerminator = '\0';
 554 mike          1.13 
 555 david.dillard 1.32         if (entry.type == XmlEntry::START_TAG)
 556                            {
 557                                if (_stack.isEmpty() && _foundRoot)
 558                                    throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 559                    
 560                                _foundRoot = true;
 561                                _stack.push((char*)entry.text);
 562                            }
 563                            else if (entry.type == XmlEntry::END_TAG)
 564                            {
 565                                if (_stack.isEmpty())
 566                                    throw XmlException(XmlException::START_END_MISMATCH, _line);
 567 mike          1.13 
 568 david.dillard 1.32             if (strcmp(_stack.top(), entry.text) != 0)
 569                                    throw XmlException(XmlException::START_END_MISMATCH, _line);
 570 mike          1.13 
 571 david.dillard 1.32             _stack.pop();
 572                            }
 573 mike          1.13 
 574 david.dillard 1.32         return true;
 575 mike          1.13     }
 576                        else
 577                        {
 578 kumpf         1.37         // Normalize the content:
 579                    
 580                            char* start;
 581                            _normalize(_line, _current, '<', start);
 582                    
 583                            // Get the content:
 584                    
 585 david.dillard 1.32         entry.type = XmlEntry::CONTENT;
 586 kumpf         1.37         entry.text = start;
 587                    
 588                            // Overwrite '<' with a null character (temporarily).
 589                    
 590 david.dillard 1.32         _restoreChar = *_current;
 591                            *_current = '\0';
 592 mike          1.13 
 593 david.dillard 1.32         if (nullTerminator)
 594                                *nullTerminator = '\0';
 595 mike          1.13 
 596 david.dillard 1.32         return true;
 597 mike          1.13     }
 598                    }
 599                    
 600                    void XmlParser::putBack(XmlEntry& entry)
 601                    {
 602                        _putBackStack.push(entry);
 603                    }
 604                    
 605                    XmlParser::~XmlParser()
 606                    {
 607                        // Nothing to do!
 608                    }
 609                    
 610 mike          1.35 // A-Za-z0-9_-:.
 611                    static unsigned char _isInnerElementChar[] = 
 612                    {
 613                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 614                        0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
 615                        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 616                        1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 617                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 618                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 619                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 620                    };
 621                    
 622 mike          1.13 Boolean XmlParser::_getElementName(char*& p)
 623                    {
 624 mike          1.35     if (!CharSet::isAlNumUnder(Uint8(*p)))
 625 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 626 mike          1.35 
 627 kumpf         1.24     p++;
 628 mike          1.13 
 629 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 630 david.dillard 1.32         p++;
 631 mike          1.13 
 632                        // The next character must be a space:
 633                    
 634 chuck         1.26     if (_isspace(*p))
 635 mike          1.13     {
 636 david.dillard 1.32         *p++ = '\0';
 637 mike          1.34         _skipWhitespace(_line, p);
 638 mike          1.13     }
 639                    
 640                        if (*p == '>')
 641                        {
 642 david.dillard 1.32         *p++ = '\0';
 643                            return true;
 644 mike          1.13     }
 645                    
 646                        return false;
 647                    }
 648                    
 649                    Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 650                    {
 651                        openCloseElement = false;
 652                    
 653 mike          1.35     if (!CharSet::isAlNumUnder(Uint8(*p)))
 654 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 655 mike          1.35 
 656 kumpf         1.24     p++;
 657 mike          1.13 
 658 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 659 david.dillard 1.32         p++;
 660 mike          1.13 
 661                        // The next character must be a space:
 662                    
 663 chuck         1.26     if (_isspace(*p))
 664 mike          1.13     {
 665 david.dillard 1.32         *p++ = '\0';
 666 mike          1.34         _skipWhitespace(_line, p);
 667 mike          1.13     }
 668                    
 669                        if (*p == '>')
 670                        {
 671 david.dillard 1.32         *p++ = '\0';
 672                            return true;
 673 mike          1.13     }
 674                    
 675                        if (p[0] == '/' && p[1] == '>')
 676                        {
 677 david.dillard 1.32         openCloseElement = true;
 678                            *p = '\0';
 679                            p += 2;
 680                            return true;
 681 mike          1.13     }
 682                    
 683                        return false;
 684                    }
 685                    
 686                    void XmlParser::_getAttributeNameAndEqual(char*& p)
 687                    {
 688 mike          1.35     if (!CharSet::isAlNumUnder((Uint8)*p))
 689 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 690 mike          1.35 
 691 kumpf         1.24     p++;
 692 mike          1.13 
 693 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 694 david.dillard 1.32         p++;
 695 mike          1.13 
 696                        char* term = p;
 697                    
 698 mike          1.34     _skipWhitespace(_line, p);
 699 mike          1.13 
 700                        if (*p != '=')
 701 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 702 mike          1.13 
 703                        p++;
 704                    
 705 mike          1.34     _skipWhitespace(_line, p);
 706 mike          1.13 
 707                        *term = '\0';
 708                    }
 709                    
 710                    void XmlParser::_getComment(char*& p)
 711                    {
 712                        // Now p points to first non-whitespace character beyond "<--" sequence:
 713                    
 714                        for (; *p; p++)
 715                        {
 716 david.dillard 1.32         if (p[0] == '-' && p[1] == '-')
 717                            {
 718                                if (p[2] != '>')
 719                                {
 720                                    throw XmlException(
 721                                        XmlException::MINUS_MINUS_IN_COMMENT, _line);
 722                                }
 723                    
 724                                // Find end of comment (excluding whitespace):
 725                    
 726                                *p = '\0';
 727                                p += 3;
 728                                return;
 729                            }
 730 mike          1.13     }
 731                    
 732                        // If it got this far, then the comment is unterminated:
 733                    
 734                        throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 735                    }
 736                    
 737                    void XmlParser::_getCData(char*& p)
 738                    {
 739                        // At this point p points one past "<![CDATA[" sequence:
 740                    
 741                        for (; *p; p++)
 742                        {
 743 david.dillard 1.32         if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 744                            {
 745                                *p = '\0';
 746                                p += 3;
 747                                return;
 748                            }
 749                            else if (*p == '\n')
 750                                _line++;
 751 mike          1.13     }
 752                    
 753                        // If it got this far, then the comment is unterminated:
 754                    
 755                        throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 756                    }
 757                    
 758                    void XmlParser::_getDocType(char*& p)
 759                    {
 760                        // Just ignore the DOCTYPE command for now:
 761                    
 762                        for (; *p && *p != '>'; p++)
 763                        {
 764 david.dillard 1.32         if (*p == '\n')
 765                                _line++;
 766 mike          1.13     }
 767                    
 768                        if (*p != '>')
 769 david.dillard 1.32         throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 770 mike          1.13 
 771                        p++;
 772                    }
 773                    
 774                    void XmlParser::_getElement(char*& p, XmlEntry& entry)
 775                    {
 776                        entry.attributeCount = 0;
 777                    
 778                        //--------------------------------------------------------------------------
 779                        // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 780                        //--------------------------------------------------------------------------
 781                    
 782                        if (*p == '?')
 783                        {
 784 david.dillard 1.32         entry.type = XmlEntry::XML_DECLARATION;
 785                            entry.text = ++p;
 786 mike          1.13 
 787 david.dillard 1.32         Boolean openCloseElement = false;
 788 mike          1.13 
 789 david.dillard 1.32         if (_getElementName(p))
 790                                return;
 791 mike          1.13     }
 792                        else if (*p == '!')
 793                        {
 794 david.dillard 1.32         p++;
 795 mike          1.13 
 796 david.dillard 1.32         // Expect a comment or CDATA:
 797 mike          1.13 
 798 david.dillard 1.32         if (p[0] == '-' && p[1] == '-')
 799                            {
 800                                p += 2;
 801                                entry.type = XmlEntry::COMMENT;
 802                                entry.text = p;
 803                                _getComment(p);
 804                                return;
 805                            }
 806                            else if (memcmp(p, "[CDATA[", 7) == 0)
 807                            {
 808                                p += 7;
 809                                entry.type = XmlEntry::CDATA;
 810                                entry.text = p;
 811                                _getCData(p);
 812                                return;
 813                            }
 814                            else if (memcmp(p, "DOCTYPE", 7) == 0)
 815                            {
 816                                entry.type = XmlEntry::DOCTYPE;
 817 kumpf         1.37             entry.text = "";
 818 david.dillard 1.32             _getDocType(p);
 819                                return;
 820                            }
 821                            throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 822 mike          1.13     }
 823                        else if (*p == '/')
 824                        {
 825 david.dillard 1.32         entry.type = XmlEntry::END_TAG;
 826                            entry.text = ++p;
 827 mike          1.13 
 828 david.dillard 1.32         if (!_getElementName(p))
 829                                throw(XmlException(XmlException::BAD_END_TAG, _line));
 830 mike          1.13 
 831 david.dillard 1.32         return;
 832 mike          1.13     }
 833 david         1.25     else if ((((*p >= 'A') && (*p <= 'Z')) ||
 834 kumpf         1.24               ((*p >= 'a') && (*p <= 'z')) ||
 835                                  (*p == '_')))
 836 mike          1.13     {
 837 david.dillard 1.32         entry.type = XmlEntry::START_TAG;
 838                            entry.text = p;
 839 mike          1.13 
 840 david.dillard 1.32         Boolean openCloseElement = false;
 841 mike          1.13 
 842 david.dillard 1.32         if (_getOpenElementName(p, openCloseElement))
 843                            {
 844                                if (openCloseElement)
 845                                    entry.type = XmlEntry::EMPTY_TAG;
 846                                return;
 847                            }
 848 mike          1.13     }
 849                        else
 850 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 851 mike          1.13 
 852                        //--------------------------------------------------------------------------
 853                        // Grab all the attributes:
 854                        //--------------------------------------------------------------------------
 855                    
 856                        for (;;)
 857                        {
 858 david.dillard 1.32         if (entry.type == XmlEntry::XML_DECLARATION)
 859                            {
 860                                if (p[0] == '?' && p[1] == '>')
 861                                {
 862                                    p += 2;
 863                                    return;
 864                                }
 865                            }
 866                            else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 867                            {
 868                                entry.type = XmlEntry::EMPTY_TAG;
 869                                p += 2;
 870                                return;
 871                            }
 872                            else if (*p == '>')
 873                            {
 874                                p++;
 875                                return;
 876                            }
 877                    
 878                            XmlAttribute attr;
 879 david.dillard 1.32         attr.name = p;
 880                            _getAttributeNameAndEqual(p);
 881                    
 882 kumpf         1.37         // Get the attribute value (e.g., "some value")
 883                            {
 884                                if ((*p != '"') && (*p != '\''))
 885                                {
 886                                    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 887                                }
 888                    
 889                                char quote = *p++;
 890                    
 891                                char* start;
 892                                _normalize(_line, p, quote, start);
 893                                attr.value = start;
 894                    
 895                                if (*p != quote)
 896                                {
 897                                    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 898                                }
 899                    
 900                                // Overwrite the closing quote with a null-terminator:
 901 david.dillard 1.32 
 902 kumpf         1.37             *p++ = '\0';
 903                            }
 904 david.dillard 1.32 
 905                            if (entry.type == XmlEntry::XML_DECLARATION)
 906                            {
 907                                // The next thing must a space or a "?>":
 908 mike          1.13 
 909 david.dillard 1.32             if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
 910                                {
 911                                    throw XmlException(
 912                                        XmlException::BAD_ATTRIBUTE_VALUE, _line);
 913                                }
 914                            }
 915                            else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
 916                            {
 917                                // The next thing must be a space or a '>':
 918 mike          1.13 
 919 david.dillard 1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 920                            }
 921 mike          1.13 
 922 mike          1.34         _skipWhitespace(_line, p);
 923 david.dillard 1.32 
 924                            if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 925                                throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 926                    
 927                            entry.attributes[entry.attributeCount++] = attr;
 928 mike          1.13     }
 929                    }
 930                    
 931                    static const char* _typeStrings[] =
 932                    {
 933 david.dillard 1.32     "XML_DECLARATION",
 934                        "START_TAG",
 935                        "EMPTY_TAG",
 936                        "END_TAG",
 937 mike          1.13     "COMMENT",
 938                        "CDATA",
 939                        "DOCTYPE",
 940 david.dillard 1.32     "CONTENT"
 941 mike          1.13 };
 942                    
 943                    void XmlEntry::print() const
 944                    {
 945                        PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
 946                    
 947                        Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
 948                    
 949                        if (needQuotes)
 950 david.dillard 1.32         PEGASUS_STD(cout) << "\"";
 951                    
 952 mike          1.13     _printValue(text);
 953                    
 954                        if (needQuotes)
 955 david.dillard 1.32         PEGASUS_STD(cout) << "\"";
 956 mike          1.13 
 957                        PEGASUS_STD(cout) << '\n';
 958                    
 959                        for (Uint32 i = 0; i < attributeCount; i++)
 960                        {
 961 david.dillard 1.32         PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
 962                            _printValue(attributes[i].value);
 963                            PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
 964 mike          1.13     }
 965                    }
 966                    
 967                    const XmlAttribute* XmlEntry::findAttribute(
 968                        const char* name) const
 969                    {
 970                        for (Uint32 i = 0; i < attributeCount; i++)
 971                        {
 972 david.dillard 1.32         if (strcmp(attributes[i].name, name) == 0)
 973                                return &attributes[i];
 974 mike          1.13     }
 975                    
 976                        return 0;
 977                    }
 978                    
 979                    // Find first non-whitespace character (set first) and last non-whitespace
 980                    // character (set last one past this). For example, consider this string:
 981                    //
 982 david.dillard 1.32 //      "   87     "
 983 mike          1.13 //
 984                    // The first pointer would point to '8' and the last pointer woudl point one
 985                    // beyond '7'.
 986                    
 987                    static void _findEnds(
 988 david.dillard 1.32     const char* str,
 989                        const char*& first,
 990 mike          1.13     const char*& last)
 991                    {
 992                        first = str;
 993                    
 994 chuck         1.26     while (_isspace(*first))
 995 david.dillard 1.32         first++;
 996 mike          1.13 
 997                        if (!*first)
 998                        {
 999 david.dillard 1.32         last = first;
1000                            return;
1001 mike          1.13     }
1002                    
1003                        last = first + strlen(first);
1004                    
1005 chuck         1.26     while (last != first && _isspace(last[-1]))
1006 david.dillard 1.32         last--;
1007 mike          1.13 }
1008                    
1009                    Boolean XmlEntry::getAttributeValue(
1010 david.dillard 1.32     const char* name,
1011 mike          1.13     Uint32& value) const
1012                    {
1013                        const XmlAttribute* attr = findAttribute(name);
1014                    
1015                        if (!attr)
1016 david.dillard 1.32         return false;
1017 mike          1.13 
1018                        const char* first;
1019                        const char* last;
1020                        _findEnds(attr->value, first, last);
1021                    
1022                        char* end = 0;
1023                        long tmp = strtol(first, &end, 10);
1024                    
1025                        if (!end || end != last)
1026 david.dillard 1.32         return false;
1027 mike          1.13 
1028                        value = Uint32(tmp);
1029                        return true;
1030                    }
1031                    
1032                    Boolean XmlEntry::getAttributeValue(
1033 david.dillard 1.32     const char* name,
1034 mike          1.13     Real32& value) const
1035                    {
1036                        const XmlAttribute* attr = findAttribute(name);
1037                    
1038                        if (!attr)
1039 david.dillard 1.32         return false;
1040 mike          1.13 
1041                        const char* first;
1042                        const char* last;
1043                        _findEnds(attr->value, first, last);
1044                    
1045                        char* end = 0;
1046                        double tmp = strtod(first, &end);
1047                    
1048                        if (!end || end != last)
1049 david.dillard 1.32         return false;
1050 mike          1.13 
1051 david.dillard 1.32     value = static_cast<Real32>(tmp);
1052 mike          1.13     return true;
1053                    }
1054                    
1055                    Boolean XmlEntry::getAttributeValue(
1056 david.dillard 1.32     const char* name,
1057 mike          1.13     const char*& value) const
1058                    {
1059                        const XmlAttribute* attr = findAttribute(name);
1060                    
1061                        if (!attr)
1062 david.dillard 1.32         return false;
1063 mike          1.13 
1064                        value = attr->value;
1065                        return true;
1066                    }
1067                    
1068                    Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1069                    {
1070                        const char* tmp;
1071                    
1072                        if (!getAttributeValue(name, tmp))
1073 david.dillard 1.32         return false;
1074 mike          1.13 
1075 chuck         1.28     value = String(tmp);
1076 mike          1.13     return true;
1077                    }
1078                    
1079 mike          1.33 void XmlAppendCString(Buffer& out, const char* str)
1080 mike          1.13 {
1081 david.dillard 1.32     out.append(str, static_cast<Uint32>(strlen(str)));
1082 mike          1.13 }
1083                    
1084                    PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2