(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.38 //%2006////////////////////////////////////////////////////////////////////////
   2 mike  1.13 //
   3 karl  1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.27 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 karl  1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  12            // EMC Corporation; Symantec Corporation; The Open Group.
  13 mike  1.13 //
  14            // Permission is hereby granted, free of charge, to any person obtaining a copy
  15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
  16            // deal in the Software without restriction, including without limitation the
  17            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  18 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  19            // furnished to do so, subject to the following conditions:
  20 karl  1.38 // 
  21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  22 mike  1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  23            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  25            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  26            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  27 mike  1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29            //
  30            //==============================================================================
  31            //
  32            // Author: Mike Brasher (mbrasher@bmc.com)
  33            //
  34 david.dillard 1.30 // Modified By: David Dillard, VERITAS Software Corp.
  35                    //                  (david.dillard@veritas.com)
  36 mike          1.13 //
  37                    //%/////////////////////////////////////////////////////////////////////////////
  38                    
  39                    ////////////////////////////////////////////////////////////////////////////////
  40                    //
  41                    // XmlParser
  42                    //
  43 david.dillard 1.32 //      This file contains a simple non-validating XML parser. Here are
  44                    //      serveral rules for well-formed XML:
  45 mike          1.13 //
  46 david.dillard 1.32 //          1.  Documents must begin with an XML declaration:
  47 mike          1.13 //
  48 david.dillard 1.32 //              <?xml version="1.0" standalone="yes"?>
  49 mike          1.13 //
  50 david.dillard 1.32 //          2.  Comments have the form:
  51 mike          1.13 //
  52 david.dillard 1.32 //              <!-- blah blah blah -->
  53 mike          1.13 //
  54 david.dillard 1.32 //          3. The following entity references are supported:
  55 mike          1.13 //
  56 david.dillard 1.32 //              &amp - ampersand
  57                    //              &lt - less-than
  58                    //              &gt - greater-than
  59                    //              &quot - full quote
  60                    //              &apos - apostrophe
  61 mike          1.13 //
  62 kumpf         1.18 //             as well as character (numeric) references:
  63 mike          1.35 //
  64 kumpf         1.18 //              &#49; - decimal reference for character '1'
  65                    //              &#x31; - hexadecimal reference for character '1'
  66                    //
  67 david.dillard 1.32 //          4. Element names and attribute names take the following form:
  68 mike          1.13 //
  69 david.dillard 1.32 //              [A-Za-z_][A-Za-z_0-9-.:]
  70 mike          1.13 //
  71 david.dillard 1.32 //          5.  Arbitrary data (CDATA) can be enclosed like this:
  72 mike          1.13 //
  73 david.dillard 1.32 //                  <![CDATA[
  74                    //                  ...
  75                    //                  ]]>
  76 mike          1.13 //
  77 david.dillard 1.32 //          6.  Element names and attributes names are case-sensitive.
  78 mike          1.13 //
  79 david.dillard 1.32 //          7.  XmlAttribute values must be delimited by full or half quotes.
  80                    //              XmlAttribute values must be delimited.
  81 mike          1.13 //
  82 david.dillard 1.32 //          8.  <!DOCTYPE...>
  83 mike          1.13 //
  84                    // TODO:
  85                    //
  86 mike          1.35 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is 
  87                    //      work. Handle <!DOCTYPE...> sections which are complicated (containing
  88 mike          1.13 //        rules rather than references to files).
  89                    //
  90 david.dillard 1.32 //      Remove newlines from string literals:
  91 mike          1.13 //
  92                    //          Example: <xyz x="hello
  93 david.dillard 1.32 //              world">
  94 mike          1.13 //
  95                    ////////////////////////////////////////////////////////////////////////////////
  96                    
  97 sage          1.14 #include <Pegasus/Common/Config.h>
  98 mike          1.13 #include <cctype>
  99                    #include <cstdio>
 100                    #include <cstdlib>
 101                    #include <cstring>
 102                    #include "XmlParser.h"
 103                    #include "Logger.h"
 104 chuck         1.19 #include "ExceptionRep.h"
 105 mike          1.34 #include "CharSet.h"
 106 mike          1.13 
 107                    PEGASUS_NAMESPACE_BEGIN
 108                    
 109                    #define PEGASUS_ARRAY_T XmlEntry
 110                    # include "ArrayImpl.h"
 111                    #undef PEGASUS_ARRAY_T
 112                    
 113                    ////////////////////////////////////////////////////////////////////////////////
 114                    //
 115                    // Static helper functions
 116                    //
 117                    ////////////////////////////////////////////////////////////////////////////////
 118                    
 119                    static void _printValue(const char* p)
 120                    {
 121                        for (; *p; p++)
 122                        {
 123 david.dillard 1.32         if (*p == '\n')
 124                                PEGASUS_STD(cout) << "\\n";
 125                            else if (*p == '\r')
 126                                PEGASUS_STD(cout) << "\\r";
 127                            else if (*p == '\t')
 128                                PEGASUS_STD(cout) << "\\t";
 129                            else
 130                                PEGASUS_STD(cout) << *p;
 131 mike          1.13     }
 132                    }
 133                    
 134                    struct EntityReference
 135                    {
 136                        const char* match;
 137                        Uint32 length;
 138                        char replacement;
 139                    };
 140                    
 141 kumpf         1.18 // ATTN: Add support for more entity references
 142 mike          1.13 static EntityReference _references[] =
 143                    {
 144                        { "&amp;", 5, '&' },
 145                        { "&lt;", 4, '<' },
 146                        { "&gt;", 4, '>' },
 147                        { "&quot;", 6, '"' },
 148                        { "&apos;", 6, '\'' }
 149                    };
 150                    
 151 chuck         1.26 
 152                    // Implements a check for a whitespace character, without calling
 153                    // isspace( ).  The isspace( ) function is locale-sensitive,
 154                    // and incorrectly flags some chars above 0x7f as whitespace.  This
 155                    // causes the XmlParser to incorrectly parse UTF-8 data.
 156                    //
 157                    // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
 158                    // defines white space as:
 159 david.dillard 1.32 // S    ::=    (#x20 | #x9 | #xD | #xA)+
 160 mike          1.34 static inline int _isspace(char c)
 161 chuck         1.26 {
 162 kumpf         1.36     return CharSet::isXmlWhiteSpace((Uint8)c);
 163 chuck         1.26 }
 164                    
 165 mike          1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 166                    
 167                    ////////////////////////////////////////////////////////////////////////////////
 168                    //
 169                    // XmlException
 170                    //
 171                    ////////////////////////////////////////////////////////////////////////////////
 172                    
 173                    static const char* _xmlMessages[] =
 174                    {
 175                        "Bad opening element",
 176                        "Bad closing element",
 177                        "Bad attribute name",
 178                        "Exepected equal sign",
 179                        "Bad attribute value",
 180                        "A \"--\" sequence found within comment",
 181                        "Unterminated comment",
 182                        "Unterminated CDATA block",
 183                        "Unterminated DOCTYPE",
 184                        "Too many attributes: parser only handles 10",
 185                        "Malformed reference",
 186 mike          1.13     "Expected a comment or CDATA following \"<!\" sequence",
 187                        "Closing element does not match opening element",
 188                        "One or more tags are still open",
 189                        "More than one root element was encountered",
 190                        "Validation error",
 191                        "Semantic error"
 192                    };
 193                    
 194 david.dillard 1.32 static const char* _xmlKeys[] =
 195 chuck         1.19 {
 196 humberto      1.20     "Common.XmlParser.BAD_START_TAG",
 197 chuck         1.19     "Common.XmlParser.BAD_END_TAG",
 198                        "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 199                        "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 200                        "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 201                        "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 202                        "Common.XmlParser.UNTERMINATED_COMMENT",
 203                        "Common.XmlParser.UNTERMINATED_CDATA",
 204                        "Common.XmlParser.UNTERMINATED_DOCTYPE",
 205                        "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 206                        "Common.XmlParser.MALFORMED_REFERENCE",
 207                        "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 208                        "Common.XmlParser.START_END_MISMATCH",
 209 david.dillard 1.32     "Common.XmlParser.UNCLOSED_TAGS",
 210 chuck         1.19     "Common.XmlParser.MULTIPLE_ROOTS",
 211                        "Common.XmlParser.VALIDATION_ERROR",
 212                        "Common.XmlParser.SEMANTIC_ERROR"
 213                    };
 214                    
 215 chuck         1.23 // l10n replace _formMessage (comment out the old one)
 216 chuck         1.19 /*
 217 mike          1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 218                    {
 219                        String result = _xmlMessages[Uint32(code) - 1];
 220                    
 221                        char buffer[32];
 222                        sprintf(buffer, "%d", line);
 223                        result.append(": on line ");
 224                        result.append(buffer);
 225                    
 226                        if (message.size())
 227                        {
 228 david.dillard 1.32         result.append(": ");
 229                            result.append(message);
 230 mike          1.13     }
 231                    
 232                        return result;
 233                    }
 234 chuck         1.19 */
 235                    
 236                    static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 237                    {
 238                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 239                        String key = _xmlKeys[Uint32(code) - 1];
 240 david.dillard 1.32         String msg = message;
 241 chuck         1.19 
 242                        dftMsg.append(": on line $0");
 243                        if (message.size())
 244                        {
 245 david.dillard 1.32         msg = ": " + msg;
 246                            dftMsg.append("$1");
 247                        }
 248 chuck         1.19 
 249                        return MessageLoaderParms(key, dftMsg, line ,msg);
 250                    }
 251                    
 252                    static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 253                    {
 254                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 255                        String key = _xmlKeys[Uint32(code) - 1];
 256                    
 257                        dftMsg.append(": on line $0");
 258 david.dillard 1.32 
 259 chuck         1.19     return MessageLoaderParms(key, dftMsg, line);
 260                    }
 261                    
 262 mike          1.13 
 263                    XmlException::XmlException(
 264 david.dillard 1.32     XmlException::Code code,
 265 mike          1.13     Uint32 lineNumber,
 266 david.dillard 1.32     const String& message)
 267 mike          1.13     : Exception(_formMessage(code, lineNumber, message))
 268                    {
 269                    
 270                    }
 271                    
 272 chuck         1.19 
 273                    XmlException::XmlException(
 274 david.dillard 1.32     XmlException::Code code,
 275 chuck         1.19     Uint32 lineNumber,
 276 david.dillard 1.32     MessageLoaderParms& msgParms)
 277 chuck         1.19     : Exception(_formPartialMessage(code, lineNumber))
 278                    {
 279 david.dillard 1.32         if (msgParms.default_msg.size())
 280 humberto      1.21     {
 281 david.dillard 1.32         msgParms.default_msg = ": " + msgParms.default_msg;
 282                        }
 283                            _rep->message.append(MessageLoader::getMessage(msgParms));
 284 chuck         1.19 }
 285                    
 286                    
 287 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 288                    //
 289                    // XmlValidationError
 290                    //
 291                    ////////////////////////////////////////////////////////////////////////////////
 292                    
 293                    XmlValidationError::XmlValidationError(
 294                        Uint32 lineNumber,
 295                        const String& message)
 296                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 297                    {
 298                    
 299                    }
 300                    
 301 chuck         1.19 
 302                    XmlValidationError::XmlValidationError(
 303                        Uint32 lineNumber,
 304                        MessageLoaderParms& msgParms)
 305                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 306                    {
 307                    
 308                    }
 309                    
 310                    
 311 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 312                    //
 313                    // XmlSemanticError
 314                    //
 315                    ////////////////////////////////////////////////////////////////////////////////
 316                    
 317                    XmlSemanticError::XmlSemanticError(
 318                        Uint32 lineNumber,
 319                        const String& message)
 320                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 321                    {
 322                    
 323                    }
 324 chuck         1.19 
 325                    
 326                    XmlSemanticError::XmlSemanticError(
 327                        Uint32 lineNumber,
 328                        MessageLoaderParms& msgParms)
 329                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 330                    {
 331                    
 332                    }
 333                    
 334 mike          1.13 
 335                    ////////////////////////////////////////////////////////////////////////////////
 336                    //
 337                    // XmlParser
 338                    //
 339                    ////////////////////////////////////////////////////////////////////////////////
 340                    
 341 david.dillard 1.32 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
 342 mike          1.13     _restoreChar('\0'), _foundRoot(false)
 343                    {
 344                    
 345                    }
 346                    
 347 mike          1.34 inline void _skipWhitespace(Uint32& line, char*& p)
 348                    {
 349                        while (*p && _isspace(*p))
 350                        {
 351                            if (*p == '\n')
 352                                line++;
 353                    
 354                            p++;
 355                        }
 356                    }
 357                    
 358 kumpf         1.37 static int _getEntityRef(char*& p)
 359                    {
 360                        if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
 361                        {
 362                            p += 3;
 363                            return '>';
 364                        }
 365                    
 366                        if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
 367                        {
 368                            p += 3;
 369                            return '<';
 370                        }
 371                    
 372                        if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
 373                            (p[4] == ';'))
 374                        {
 375                            p += 5;
 376                            return '\'';
 377                        }
 378                    
 379 kumpf         1.37     if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
 380                            (p[4] == ';'))
 381                        {
 382                            p += 5;
 383                            return '"';
 384                        }
 385                    
 386                        if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
 387                        {
 388                            p += 4;
 389                            return '&';
 390                        }
 391                    
 392                        return -1;
 393                    }
 394                    
 395                    static inline int _getCharRef(char*& p, bool hex)
 396                    {
 397                        char* end;
 398                        unsigned long ch;
 399                    
 400 kumpf         1.37     if (hex)
 401                        {
 402                            ch = strtoul(p, &end, 16);
 403                        }
 404                        else
 405                        {
 406                            ch = strtoul(p, &end, 10);
 407                        }
 408                    
 409                        if ((end == p) || (*end != ';') || (ch > 255))
 410                        {
 411                            return -1;
 412                        }
 413                    
 414                        if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
 415                        {
 416                            return -1;
 417                        }
 418                    
 419                        p = end + 1;
 420                    
 421 kumpf         1.37     return ch;
 422                    }
 423                    
 424                    static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
 425                    {
 426                        // Skip over leading whitespace:
 427                    
 428                        _skipWhitespace(line, p);
 429                        start = p;
 430                    
 431                        // Process one character at a time:
 432                    
 433                        char* q = p;
 434                    
 435                        while (*p && (*p != end_char))
 436                        {
 437                            if (_isspace(*p))
 438                            {
 439                                // Compress sequences of whitespace characters to a single space
 440                                // character. Update line number when newlines encountered.
 441                    
 442 kumpf         1.37             if (*p++ == '\n')
 443                                {
 444                                    line++;
 445                                }
 446                    
 447                                *q++ = ' ';
 448                    
 449                                _skipWhitespace(line, p);
 450                            }
 451                            else if (*p == '&')
 452                            {
 453                                // Process entity characters and entity references:
 454                    
 455                                p++;
 456                                int ch;
 457                    
 458                                if (*p == '#')
 459                                {
 460                                    *p++;
 461                    
 462                                    if (*p == 'x')
 463 kumpf         1.37                 {
 464                                        p++;
 465                                        ch = _getCharRef(p, true);
 466                                    }
 467                                    else
 468                                    {
 469                                        ch = _getCharRef(p, false);
 470                                    }
 471                                }
 472                                else
 473                                {
 474                                    ch = _getEntityRef(p);
 475                                }
 476                    
 477                                if (ch == -1)
 478                                {
 479                                    throw XmlException(XmlException::MALFORMED_REFERENCE, line);
 480                                }
 481                    
 482                                *q++ = ch;
 483                            }
 484 kumpf         1.37         else
 485                            {
 486                                *q++ = *p++;
 487                            }
 488                        }
 489                    
 490                        // We encountered a the end_char or a zero-terminator. 
 491                    
 492                        *q = *p;
 493                    
 494                        // Remove single trailing whitespace (consecutive whitespaces already
 495                        // compressed above).  Since p >= q, we can tell if we need to strip a
 496                        // trailing space from q by looking at the end of p.  We must not look at
 497                        // the last character of p, though, if p is an empty string.
 498                    
 499                        if ((p != start) && _isspace(p[-1]))
 500                        {
 501                            q--;
 502                        }
 503                    
 504                        // If q got behind p, it is safe and necessary to null-terminate q
 505 kumpf         1.37 
 506                        if (q != p)
 507                        {
 508                            *q = '\0';
 509                        }
 510                    }
 511                    
 512 mike          1.13 Boolean XmlParser::next(XmlEntry& entry)
 513                    {
 514                        if (!_putBackStack.isEmpty())
 515                        {
 516 david.dillard 1.32         entry = _putBackStack.top();
 517                            _putBackStack.pop();
 518                            return true;
 519 mike          1.13     }
 520                    
 521                        // If a character was overwritten with a null-terminator the last
 522                        // time this routine was called, then put back that character. Before
 523                        // exiting of course, restore the null-terminator.
 524                    
 525                        char* nullTerminator = 0;
 526                    
 527                        if (_restoreChar && !*_current)
 528                        {
 529 david.dillard 1.32         nullTerminator = _current;
 530                            *_current = _restoreChar;
 531                            _restoreChar = '\0';
 532 mike          1.13     }
 533                    
 534                        // Skip over any whitespace:
 535                    
 536 mike          1.34     _skipWhitespace(_line, _current);
 537 mike          1.13 
 538                        if (!*_current)
 539                        {
 540 david.dillard 1.32         if (nullTerminator)
 541                                *nullTerminator = '\0';
 542 mike          1.13 
 543 david.dillard 1.32         if (!_stack.isEmpty())
 544                                throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 545 mike          1.13 
 546 david.dillard 1.32         return false;
 547 mike          1.13     }
 548                    
 549                        // Either a "<...>" or content begins next:
 550                    
 551                        if (*_current == '<')
 552                        {
 553 david.dillard 1.32         _current++;
 554                            _getElement(_current, entry);
 555 mike          1.13 
 556 david.dillard 1.32         if (nullTerminator)
 557                                *nullTerminator = '\0';
 558 mike          1.13 
 559 david.dillard 1.32         if (entry.type == XmlEntry::START_TAG)
 560                            {
 561                                if (_stack.isEmpty() && _foundRoot)
 562                                    throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 563                    
 564                                _foundRoot = true;
 565                                _stack.push((char*)entry.text);
 566                            }
 567                            else if (entry.type == XmlEntry::END_TAG)
 568                            {
 569                                if (_stack.isEmpty())
 570                                    throw XmlException(XmlException::START_END_MISMATCH, _line);
 571 mike          1.13 
 572 david.dillard 1.32             if (strcmp(_stack.top(), entry.text) != 0)
 573                                    throw XmlException(XmlException::START_END_MISMATCH, _line);
 574 mike          1.13 
 575 david.dillard 1.32             _stack.pop();
 576                            }
 577 mike          1.13 
 578 david.dillard 1.32         return true;
 579 mike          1.13     }
 580                        else
 581                        {
 582 kumpf         1.37         // Normalize the content:
 583                    
 584                            char* start;
 585                            _normalize(_line, _current, '<', start);
 586                    
 587                            // Get the content:
 588                    
 589 david.dillard 1.32         entry.type = XmlEntry::CONTENT;
 590 kumpf         1.37         entry.text = start;
 591                    
 592                            // Overwrite '<' with a null character (temporarily).
 593                    
 594 david.dillard 1.32         _restoreChar = *_current;
 595                            *_current = '\0';
 596 mike          1.13 
 597 david.dillard 1.32         if (nullTerminator)
 598                                *nullTerminator = '\0';
 599 mike          1.13 
 600 david.dillard 1.32         return true;
 601 mike          1.13     }
 602                    }
 603                    
 604                    void XmlParser::putBack(XmlEntry& entry)
 605                    {
 606                        _putBackStack.push(entry);
 607                    }
 608                    
 609                    XmlParser::~XmlParser()
 610                    {
 611                        // Nothing to do!
 612                    }
 613                    
 614 mike          1.35 // A-Za-z0-9_-:.
 615                    static unsigned char _isInnerElementChar[] = 
 616                    {
 617                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 618                        0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
 619                        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 620                        1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 621                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 622                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 623                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 624                    };
 625                    
 626 mike          1.13 Boolean XmlParser::_getElementName(char*& p)
 627                    {
 628 mike          1.35     if (!CharSet::isAlNumUnder(Uint8(*p)))
 629 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 630 mike          1.35 
 631 kumpf         1.24     p++;
 632 mike          1.13 
 633 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 634 david.dillard 1.32         p++;
 635 mike          1.13 
 636                        // The next character must be a space:
 637                    
 638 chuck         1.26     if (_isspace(*p))
 639 mike          1.13     {
 640 david.dillard 1.32         *p++ = '\0';
 641 mike          1.34         _skipWhitespace(_line, p);
 642 mike          1.13     }
 643                    
 644                        if (*p == '>')
 645                        {
 646 david.dillard 1.32         *p++ = '\0';
 647                            return true;
 648 mike          1.13     }
 649                    
 650                        return false;
 651                    }
 652                    
 653                    Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 654                    {
 655                        openCloseElement = false;
 656                    
 657 mike          1.35     if (!CharSet::isAlNumUnder(Uint8(*p)))
 658 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 659 mike          1.35 
 660 kumpf         1.24     p++;
 661 mike          1.13 
 662 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 663 david.dillard 1.32         p++;
 664 mike          1.13 
 665                        // The next character must be a space:
 666                    
 667 chuck         1.26     if (_isspace(*p))
 668 mike          1.13     {
 669 david.dillard 1.32         *p++ = '\0';
 670 mike          1.34         _skipWhitespace(_line, p);
 671 mike          1.13     }
 672                    
 673                        if (*p == '>')
 674                        {
 675 david.dillard 1.32         *p++ = '\0';
 676                            return true;
 677 mike          1.13     }
 678                    
 679                        if (p[0] == '/' && p[1] == '>')
 680                        {
 681 david.dillard 1.32         openCloseElement = true;
 682                            *p = '\0';
 683                            p += 2;
 684                            return true;
 685 mike          1.13     }
 686                    
 687                        return false;
 688                    }
 689                    
 690                    void XmlParser::_getAttributeNameAndEqual(char*& p)
 691                    {
 692 mike          1.35     if (!CharSet::isAlNumUnder((Uint8)*p))
 693 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 694 mike          1.35 
 695 kumpf         1.24     p++;
 696 mike          1.13 
 697 mike          1.35     while (*p && _isInnerElementChar[Uint8(*p)])
 698 david.dillard 1.32         p++;
 699 mike          1.13 
 700                        char* term = p;
 701                    
 702 mike          1.34     _skipWhitespace(_line, p);
 703 mike          1.13 
 704                        if (*p != '=')
 705 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 706 mike          1.13 
 707                        p++;
 708                    
 709 mike          1.34     _skipWhitespace(_line, p);
 710 mike          1.13 
 711                        *term = '\0';
 712                    }
 713                    
 714                    void XmlParser::_getComment(char*& p)
 715                    {
 716                        // Now p points to first non-whitespace character beyond "<--" sequence:
 717                    
 718                        for (; *p; p++)
 719                        {
 720 david.dillard 1.32         if (p[0] == '-' && p[1] == '-')
 721                            {
 722                                if (p[2] != '>')
 723                                {
 724                                    throw XmlException(
 725                                        XmlException::MINUS_MINUS_IN_COMMENT, _line);
 726                                }
 727                    
 728                                // Find end of comment (excluding whitespace):
 729                    
 730                                *p = '\0';
 731                                p += 3;
 732                                return;
 733                            }
 734 mike          1.13     }
 735                    
 736                        // If it got this far, then the comment is unterminated:
 737                    
 738                        throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 739                    }
 740                    
 741                    void XmlParser::_getCData(char*& p)
 742                    {
 743                        // At this point p points one past "<![CDATA[" sequence:
 744                    
 745                        for (; *p; p++)
 746                        {
 747 david.dillard 1.32         if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 748                            {
 749                                *p = '\0';
 750                                p += 3;
 751                                return;
 752                            }
 753                            else if (*p == '\n')
 754                                _line++;
 755 mike          1.13     }
 756                    
 757                        // If it got this far, then the comment is unterminated:
 758                    
 759                        throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 760                    }
 761                    
 762                    void XmlParser::_getDocType(char*& p)
 763                    {
 764                        // Just ignore the DOCTYPE command for now:
 765                    
 766                        for (; *p && *p != '>'; p++)
 767                        {
 768 david.dillard 1.32         if (*p == '\n')
 769                                _line++;
 770 mike          1.13     }
 771                    
 772                        if (*p != '>')
 773 david.dillard 1.32         throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 774 mike          1.13 
 775                        p++;
 776                    }
 777                    
 778                    void XmlParser::_getElement(char*& p, XmlEntry& entry)
 779                    {
 780                        entry.attributeCount = 0;
 781                    
 782                        //--------------------------------------------------------------------------
 783                        // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 784                        //--------------------------------------------------------------------------
 785                    
 786                        if (*p == '?')
 787                        {
 788 david.dillard 1.32         entry.type = XmlEntry::XML_DECLARATION;
 789                            entry.text = ++p;
 790 mike          1.13 
 791 david.dillard 1.32         Boolean openCloseElement = false;
 792 mike          1.13 
 793 david.dillard 1.32         if (_getElementName(p))
 794                                return;
 795 mike          1.13     }
 796                        else if (*p == '!')
 797                        {
 798 david.dillard 1.32         p++;
 799 mike          1.13 
 800 david.dillard 1.32         // Expect a comment or CDATA:
 801 mike          1.13 
 802 david.dillard 1.32         if (p[0] == '-' && p[1] == '-')
 803                            {
 804                                p += 2;
 805                                entry.type = XmlEntry::COMMENT;
 806                                entry.text = p;
 807                                _getComment(p);
 808                                return;
 809                            }
 810                            else if (memcmp(p, "[CDATA[", 7) == 0)
 811                            {
 812                                p += 7;
 813                                entry.type = XmlEntry::CDATA;
 814                                entry.text = p;
 815                                _getCData(p);
 816                                return;
 817                            }
 818                            else if (memcmp(p, "DOCTYPE", 7) == 0)
 819                            {
 820                                entry.type = XmlEntry::DOCTYPE;
 821 kumpf         1.37             entry.text = "";
 822 david.dillard 1.32             _getDocType(p);
 823                                return;
 824                            }
 825                            throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 826 mike          1.13     }
 827                        else if (*p == '/')
 828                        {
 829 david.dillard 1.32         entry.type = XmlEntry::END_TAG;
 830                            entry.text = ++p;
 831 mike          1.13 
 832 david.dillard 1.32         if (!_getElementName(p))
 833                                throw(XmlException(XmlException::BAD_END_TAG, _line));
 834 mike          1.13 
 835 david.dillard 1.32         return;
 836 mike          1.13     }
 837 david         1.25     else if ((((*p >= 'A') && (*p <= 'Z')) ||
 838 kumpf         1.24               ((*p >= 'a') && (*p <= 'z')) ||
 839                                  (*p == '_')))
 840 mike          1.13     {
 841 david.dillard 1.32         entry.type = XmlEntry::START_TAG;
 842                            entry.text = p;
 843 mike          1.13 
 844 david.dillard 1.32         Boolean openCloseElement = false;
 845 mike          1.13 
 846 david.dillard 1.32         if (_getOpenElementName(p, openCloseElement))
 847                            {
 848                                if (openCloseElement)
 849                                    entry.type = XmlEntry::EMPTY_TAG;
 850                                return;
 851                            }
 852 mike          1.13     }
 853                        else
 854 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 855 mike          1.13 
 856                        //--------------------------------------------------------------------------
 857                        // Grab all the attributes:
 858                        //--------------------------------------------------------------------------
 859                    
 860                        for (;;)
 861                        {
 862 david.dillard 1.32         if (entry.type == XmlEntry::XML_DECLARATION)
 863                            {
 864                                if (p[0] == '?' && p[1] == '>')
 865                                {
 866                                    p += 2;
 867                                    return;
 868                                }
 869                            }
 870                            else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 871                            {
 872                                entry.type = XmlEntry::EMPTY_TAG;
 873                                p += 2;
 874                                return;
 875                            }
 876                            else if (*p == '>')
 877                            {
 878                                p++;
 879                                return;
 880                            }
 881                    
 882                            XmlAttribute attr;
 883 david.dillard 1.32         attr.name = p;
 884                            _getAttributeNameAndEqual(p);
 885                    
 886 kumpf         1.37         // Get the attribute value (e.g., "some value")
 887                            {
 888                                if ((*p != '"') && (*p != '\''))
 889                                {
 890                                    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 891                                }
 892                    
 893                                char quote = *p++;
 894                    
 895                                char* start;
 896                                _normalize(_line, p, quote, start);
 897                                attr.value = start;
 898                    
 899                                if (*p != quote)
 900                                {
 901                                    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 902                                }
 903                    
 904                                // Overwrite the closing quote with a null-terminator:
 905 david.dillard 1.32 
 906 kumpf         1.37             *p++ = '\0';
 907                            }
 908 david.dillard 1.32 
 909                            if (entry.type == XmlEntry::XML_DECLARATION)
 910                            {
 911                                // The next thing must a space or a "?>":
 912 mike          1.13 
 913 david.dillard 1.32             if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
 914                                {
 915                                    throw XmlException(
 916                                        XmlException::BAD_ATTRIBUTE_VALUE, _line);
 917                                }
 918                            }
 919                            else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
 920                            {
 921                                // The next thing must be a space or a '>':
 922 mike          1.13 
 923 david.dillard 1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 924                            }
 925 mike          1.13 
 926 mike          1.34         _skipWhitespace(_line, p);
 927 david.dillard 1.32 
 928                            if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 929                                throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 930                    
 931                            entry.attributes[entry.attributeCount++] = attr;
 932 mike          1.13     }
 933                    }
 934                    
 935                    static const char* _typeStrings[] =
 936                    {
 937 david.dillard 1.32     "XML_DECLARATION",
 938                        "START_TAG",
 939                        "EMPTY_TAG",
 940                        "END_TAG",
 941 mike          1.13     "COMMENT",
 942                        "CDATA",
 943                        "DOCTYPE",
 944 david.dillard 1.32     "CONTENT"
 945 mike          1.13 };
 946                    
 947                    void XmlEntry::print() const
 948                    {
 949                        PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
 950                    
 951                        Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
 952                    
 953                        if (needQuotes)
 954 david.dillard 1.32         PEGASUS_STD(cout) << "\"";
 955                    
 956 mike          1.13     _printValue(text);
 957                    
 958                        if (needQuotes)
 959 david.dillard 1.32         PEGASUS_STD(cout) << "\"";
 960 mike          1.13 
 961                        PEGASUS_STD(cout) << '\n';
 962                    
 963                        for (Uint32 i = 0; i < attributeCount; i++)
 964                        {
 965 david.dillard 1.32         PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
 966                            _printValue(attributes[i].value);
 967                            PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
 968 mike          1.13     }
 969                    }
 970                    
 971                    const XmlAttribute* XmlEntry::findAttribute(
 972                        const char* name) const
 973                    {
 974                        for (Uint32 i = 0; i < attributeCount; i++)
 975                        {
 976 david.dillard 1.32         if (strcmp(attributes[i].name, name) == 0)
 977                                return &attributes[i];
 978 mike          1.13     }
 979                    
 980                        return 0;
 981                    }
 982                    
 983                    // Find first non-whitespace character (set first) and last non-whitespace
 984                    // character (set last one past this). For example, consider this string:
 985                    //
 986 david.dillard 1.32 //      "   87     "
 987 mike          1.13 //
 988                    // The first pointer would point to '8' and the last pointer woudl point one
 989                    // beyond '7'.
 990                    
 991                    static void _findEnds(
 992 david.dillard 1.32     const char* str,
 993                        const char*& first,
 994 mike          1.13     const char*& last)
 995                    {
 996                        first = str;
 997                    
 998 chuck         1.26     while (_isspace(*first))
 999 david.dillard 1.32         first++;
1000 mike          1.13 
1001                        if (!*first)
1002                        {
1003 david.dillard 1.32         last = first;
1004                            return;
1005 mike          1.13     }
1006                    
1007                        last = first + strlen(first);
1008                    
1009 chuck         1.26     while (last != first && _isspace(last[-1]))
1010 david.dillard 1.32         last--;
1011 mike          1.13 }
1012                    
1013                    Boolean XmlEntry::getAttributeValue(
1014 david.dillard 1.32     const char* name,
1015 mike          1.13     Uint32& value) const
1016                    {
1017                        const XmlAttribute* attr = findAttribute(name);
1018                    
1019                        if (!attr)
1020 david.dillard 1.32         return false;
1021 mike          1.13 
1022                        const char* first;
1023                        const char* last;
1024                        _findEnds(attr->value, first, last);
1025                    
1026                        char* end = 0;
1027                        long tmp = strtol(first, &end, 10);
1028                    
1029                        if (!end || end != last)
1030 david.dillard 1.32         return false;
1031 mike          1.13 
1032                        value = Uint32(tmp);
1033                        return true;
1034                    }
1035                    
1036                    Boolean XmlEntry::getAttributeValue(
1037 david.dillard 1.32     const char* name,
1038 mike          1.13     Real32& value) const
1039                    {
1040                        const XmlAttribute* attr = findAttribute(name);
1041                    
1042                        if (!attr)
1043 david.dillard 1.32         return false;
1044 mike          1.13 
1045                        const char* first;
1046                        const char* last;
1047                        _findEnds(attr->value, first, last);
1048                    
1049                        char* end = 0;
1050                        double tmp = strtod(first, &end);
1051                    
1052                        if (!end || end != last)
1053 david.dillard 1.32         return false;
1054 mike          1.13 
1055 david.dillard 1.32     value = static_cast<Real32>(tmp);
1056 mike          1.13     return true;
1057                    }
1058                    
1059                    Boolean XmlEntry::getAttributeValue(
1060 david.dillard 1.32     const char* name,
1061 mike          1.13     const char*& value) const
1062                    {
1063                        const XmlAttribute* attr = findAttribute(name);
1064                    
1065                        if (!attr)
1066 david.dillard 1.32         return false;
1067 mike          1.13 
1068                        value = attr->value;
1069                        return true;
1070                    }
1071                    
1072                    Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1073                    {
1074                        const char* tmp;
1075                    
1076                        if (!getAttributeValue(name, tmp))
1077 david.dillard 1.32         return false;
1078 mike          1.13 
1079 chuck         1.28     value = String(tmp);
1080 mike          1.13     return true;
1081                    }
1082                    
1083 mike          1.33 void XmlAppendCString(Buffer& out, const char* str)
1084 mike          1.13 {
1085 david.dillard 1.32     out.append(str, static_cast<Uint32>(strlen(str)));
1086 mike          1.13 }
1087                    
1088                    PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2