(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.31 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.13 //
   3 karl  1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.27 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.13 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.32 //
  19 kumpf         1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                    // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                    // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                    // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                    // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                    //
  28                    //==============================================================================
  29                    //
  30                    // Author: Mike Brasher (mbrasher@bmc.com)
  31                    //
  32 david.dillard 1.30 // Modified By: David Dillard, VERITAS Software Corp.
  33                    //                  (david.dillard@veritas.com)
  34 mike          1.13 //
  35                    //%/////////////////////////////////////////////////////////////////////////////
  36                    
  37                    ////////////////////////////////////////////////////////////////////////////////
  38                    //
  39                    // XmlParser
  40                    //
  41 david.dillard 1.32 //      This file contains a simple non-validating XML parser. Here are
  42                    //      serveral rules for well-formed XML:
  43 mike          1.13 //
  44 david.dillard 1.32 //          1.  Documents must begin with an XML declaration:
  45 mike          1.13 //
  46 david.dillard 1.32 //              <?xml version="1.0" standalone="yes"?>
  47 mike          1.13 //
  48 david.dillard 1.32 //          2.  Comments have the form:
  49 mike          1.13 //
  50 david.dillard 1.32 //              <!-- blah blah blah -->
  51 mike          1.13 //
  52 david.dillard 1.32 //          3. The following entity references are supported:
  53 mike          1.13 //
  54 david.dillard 1.32 //              &amp - ampersand
  55                    //              &lt - less-than
  56                    //              &gt - greater-than
  57                    //              &quot - full quote
  58                    //              &apos - apostrophe
  59 mike          1.13 //
  60 kumpf         1.18 //             as well as character (numeric) references:
  61                    
  62                    //              &#49; - decimal reference for character '1'
  63                    //              &#x31; - hexadecimal reference for character '1'
  64                    //
  65 david.dillard 1.32 //          4. Element names and attribute names take the following form:
  66 mike          1.13 //
  67 david.dillard 1.32 //              [A-Za-z_][A-Za-z_0-9-.:]
  68 mike          1.13 //
  69 david.dillard 1.32 //          5.  Arbitrary data (CDATA) can be enclosed like this:
  70 mike          1.13 //
  71 david.dillard 1.32 //                  <![CDATA[
  72                    //                  ...
  73                    //                  ]]>
  74 mike          1.13 //
  75 david.dillard 1.32 //          6.  Element names and attributes names are case-sensitive.
  76 mike          1.13 //
  77 david.dillard 1.32 //          7.  XmlAttribute values must be delimited by full or half quotes.
  78                    //              XmlAttribute values must be delimited.
  79 mike          1.13 //
  80 david.dillard 1.32 //          8.  <!DOCTYPE...>
  81 mike          1.13 //
  82                    // TODO:
  83                    //
  84 karl          1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  85 david.dillard 1.32 //      Handle <!DOCTYPE...> sections which are complicated (containing
  86 mike          1.13 //        rules rather than references to files).
  87                    //
  88 david.dillard 1.32 //      Remove newlines from string literals:
  89 mike          1.13 //
  90                    //          Example: <xyz x="hello
  91 david.dillard 1.32 //              world">
  92 mike          1.13 //
  93                    ////////////////////////////////////////////////////////////////////////////////
  94                    
  95 sage          1.14 #include <Pegasus/Common/Config.h>
  96 mike          1.13 #include <cctype>
  97                    #include <cstdio>
  98                    #include <cstdlib>
  99                    #include <cstring>
 100                    #include "XmlParser.h"
 101                    #include "Logger.h"
 102 chuck         1.19 #include "ExceptionRep.h"
 103 mike          1.13 
 104                    PEGASUS_NAMESPACE_BEGIN
 105                    
 106                    #define PEGASUS_ARRAY_T XmlEntry
 107                    # include "ArrayImpl.h"
 108                    #undef PEGASUS_ARRAY_T
 109                    
 110                    
 111                    ////////////////////////////////////////////////////////////////////////////////
 112                    //
 113                    // Static helper functions
 114                    //
 115                    ////////////////////////////////////////////////////////////////////////////////
 116                    
 117                    static void _printValue(const char* p)
 118                    {
 119                        for (; *p; p++)
 120                        {
 121 david.dillard 1.32         if (*p == '\n')
 122                                PEGASUS_STD(cout) << "\\n";
 123                            else if (*p == '\r')
 124                                PEGASUS_STD(cout) << "\\r";
 125                            else if (*p == '\t')
 126                                PEGASUS_STD(cout) << "\\t";
 127                            else
 128                                PEGASUS_STD(cout) << *p;
 129 mike          1.13     }
 130                    }
 131                    
 132                    struct EntityReference
 133                    {
 134                        const char* match;
 135                        Uint32 length;
 136                        char replacement;
 137                    };
 138                    
 139 kumpf         1.18 // ATTN: Add support for more entity references
 140 mike          1.13 static EntityReference _references[] =
 141                    {
 142                        { "&amp;", 5, '&' },
 143                        { "&lt;", 4, '<' },
 144                        { "&gt;", 4, '>' },
 145                        { "&quot;", 6, '"' },
 146                        { "&apos;", 6, '\'' }
 147                    };
 148                    
 149 chuck         1.26 
 150                    // Implements a check for a whitespace character, without calling
 151                    // isspace( ).  The isspace( ) function is locale-sensitive,
 152                    // and incorrectly flags some chars above 0x7f as whitespace.  This
 153                    // causes the XmlParser to incorrectly parse UTF-8 data.
 154                    //
 155                    // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
 156                    // defines white space as:
 157 david.dillard 1.32 // S    ::=    (#x20 | #x9 | #xD | #xA)+
 158 chuck         1.26 static int _isspace(char c)
 159                    {
 160 david.dillard 1.32         if (c == ' ' || c == '\r' || c == '\t' || c == '\n')
 161                                    return 1;
 162                            return 0;
 163 chuck         1.26 }
 164                    
 165                    
 166 mike          1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 167                    
 168                    // Remove all redundant spaces from the given string:
 169                    
 170                    static void _normalize(char* text)
 171                    {
 172                        char* p = text;
 173 david.dillard 1.32     char* end = p + strlen(text);
 174 mike          1.13 
 175                        // Remove leading spaces:
 176                    
 177 chuck         1.26     while (_isspace(*p))
 178 david.dillard 1.32                 p++;
 179 mike          1.13 
 180                        if (p != text)
 181 david.dillard 1.32         memmove(text, p, end - p + 1);
 182 mike          1.13 
 183                        p = text;
 184                    
 185                        // Look for sequences of more than one space and remove all but one.
 186                    
 187                        for (;;)
 188                        {
 189 david.dillard 1.32         // Advance to the next space:
 190 mike          1.13 
 191 david.dillard 1.32         while (*p && !_isspace(*p))
 192                                p++;
 193 mike          1.13 
 194 david.dillard 1.32         if (!*p)
 195                                break;
 196 mike          1.13 
 197 david.dillard 1.32         // Advance to the next non-space:
 198 mike          1.13 
 199 david.dillard 1.32         char* q = p++;
 200 mike          1.13 
 201 david.dillard 1.32         while (_isspace(*p))
 202                                p++;
 203 mike          1.13 
 204 david.dillard 1.32         // Discard trailing spaces (if we are at the end):
 205 mike          1.13 
 206 david.dillard 1.32         if (!*p)
 207                            {
 208                                *q = '\0';
 209                                break;
 210                            }
 211 mike          1.13 
 212 david.dillard 1.32         // Remove the redundant spaces:
 213 mike          1.13 
 214 david.dillard 1.32         const size_t n = p - q;
 215 mike          1.13 
 216 david.dillard 1.32         if (n > 1)
 217                            {
 218                                *q++ = ' ';
 219                                memmove(q, p, end - p + 1);
 220                                p = q;
 221                            }
 222 mike          1.13     }
 223                    }
 224                    
 225                    ////////////////////////////////////////////////////////////////////////////////
 226                    //
 227                    // XmlException
 228                    //
 229                    ////////////////////////////////////////////////////////////////////////////////
 230                    
 231                    static const char* _xmlMessages[] =
 232                    {
 233                        "Bad opening element",
 234                        "Bad closing element",
 235                        "Bad attribute name",
 236                        "Exepected equal sign",
 237                        "Bad attribute value",
 238                        "A \"--\" sequence found within comment",
 239                        "Unterminated comment",
 240                        "Unterminated CDATA block",
 241                        "Unterminated DOCTYPE",
 242                        "Too many attributes: parser only handles 10",
 243 mike          1.13     "Malformed reference",
 244                        "Expected a comment or CDATA following \"<!\" sequence",
 245                        "Closing element does not match opening element",
 246                        "One or more tags are still open",
 247                        "More than one root element was encountered",
 248                        "Validation error",
 249                        "Semantic error"
 250                    };
 251                    
 252 david.dillard 1.32 static const char* _xmlKeys[] =
 253 chuck         1.19 {
 254 humberto      1.20     "Common.XmlParser.BAD_START_TAG",
 255 chuck         1.19     "Common.XmlParser.BAD_END_TAG",
 256                        "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 257                        "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 258                        "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 259                        "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 260                        "Common.XmlParser.UNTERMINATED_COMMENT",
 261                        "Common.XmlParser.UNTERMINATED_CDATA",
 262                        "Common.XmlParser.UNTERMINATED_DOCTYPE",
 263                        "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 264                        "Common.XmlParser.MALFORMED_REFERENCE",
 265                        "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 266                        "Common.XmlParser.START_END_MISMATCH",
 267 david.dillard 1.32     "Common.XmlParser.UNCLOSED_TAGS",
 268 chuck         1.19     "Common.XmlParser.MULTIPLE_ROOTS",
 269                        "Common.XmlParser.VALIDATION_ERROR",
 270                        "Common.XmlParser.SEMANTIC_ERROR"
 271                    };
 272                    
 273 chuck         1.23 // l10n replace _formMessage (comment out the old one)
 274 chuck         1.19 /*
 275 mike          1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
 276                    {
 277                        String result = _xmlMessages[Uint32(code) - 1];
 278                    
 279                        char buffer[32];
 280                        sprintf(buffer, "%d", line);
 281                        result.append(": on line ");
 282                        result.append(buffer);
 283                    
 284                        if (message.size())
 285                        {
 286 david.dillard 1.32         result.append(": ");
 287                            result.append(message);
 288 mike          1.13     }
 289                    
 290                        return result;
 291                    }
 292 chuck         1.19 */
 293                    
 294                    static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 295                    {
 296                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 297                        String key = _xmlKeys[Uint32(code) - 1];
 298 david.dillard 1.32         String msg = message;
 299 chuck         1.19 
 300                        dftMsg.append(": on line $0");
 301                        if (message.size())
 302                        {
 303 david.dillard 1.32         msg = ": " + msg;
 304                            dftMsg.append("$1");
 305                        }
 306 chuck         1.19 
 307                        return MessageLoaderParms(key, dftMsg, line ,msg);
 308                    }
 309                    
 310                    static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 311                    {
 312                        String dftMsg = _xmlMessages[Uint32(code) - 1];
 313                        String key = _xmlKeys[Uint32(code) - 1];
 314                    
 315                        dftMsg.append(": on line $0");
 316 david.dillard 1.32 
 317 chuck         1.19     return MessageLoaderParms(key, dftMsg, line);
 318                    }
 319                    
 320 mike          1.13 
 321                    XmlException::XmlException(
 322 david.dillard 1.32     XmlException::Code code,
 323 mike          1.13     Uint32 lineNumber,
 324 david.dillard 1.32     const String& message)
 325 mike          1.13     : Exception(_formMessage(code, lineNumber, message))
 326                    {
 327                    
 328                    }
 329                    
 330 chuck         1.19 
 331                    XmlException::XmlException(
 332 david.dillard 1.32     XmlException::Code code,
 333 chuck         1.19     Uint32 lineNumber,
 334 david.dillard 1.32     MessageLoaderParms& msgParms)
 335 chuck         1.19     : Exception(_formPartialMessage(code, lineNumber))
 336                    {
 337 david.dillard 1.32         if (msgParms.default_msg.size())
 338 humberto      1.21     {
 339 david.dillard 1.32         msgParms.default_msg = ": " + msgParms.default_msg;
 340                        }
 341                            _rep->message.append(MessageLoader::getMessage(msgParms));
 342 chuck         1.19 }
 343                    
 344                    
 345 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 346                    //
 347                    // XmlValidationError
 348                    //
 349                    ////////////////////////////////////////////////////////////////////////////////
 350                    
 351                    XmlValidationError::XmlValidationError(
 352                        Uint32 lineNumber,
 353                        const String& message)
 354                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 355                    {
 356                    
 357                    }
 358                    
 359 chuck         1.19 
 360                    XmlValidationError::XmlValidationError(
 361                        Uint32 lineNumber,
 362                        MessageLoaderParms& msgParms)
 363                        : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 364                    {
 365                    
 366                    }
 367                    
 368                    
 369 mike          1.13 ////////////////////////////////////////////////////////////////////////////////
 370                    //
 371                    // XmlSemanticError
 372                    //
 373                    ////////////////////////////////////////////////////////////////////////////////
 374                    
 375                    XmlSemanticError::XmlSemanticError(
 376                        Uint32 lineNumber,
 377                        const String& message)
 378                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 379                    {
 380                    
 381                    }
 382 chuck         1.19 
 383                    
 384                    XmlSemanticError::XmlSemanticError(
 385                        Uint32 lineNumber,
 386                        MessageLoaderParms& msgParms)
 387                        : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 388                    {
 389                    
 390                    }
 391                    
 392 mike          1.13 
 393                    ////////////////////////////////////////////////////////////////////////////////
 394                    //
 395                    // XmlParser
 396                    //
 397                    ////////////////////////////////////////////////////////////////////////////////
 398                    
 399 david.dillard 1.32 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
 400 mike          1.13     _restoreChar('\0'), _foundRoot(false)
 401                    {
 402                    
 403                    }
 404                    
 405                    Boolean XmlParser::next(XmlEntry& entry)
 406                    {
 407                        if (!_putBackStack.isEmpty())
 408                        {
 409 david.dillard 1.32         entry = _putBackStack.top();
 410                            _putBackStack.pop();
 411                            return true;
 412 mike          1.13     }
 413                    
 414                        // If a character was overwritten with a null-terminator the last
 415                        // time this routine was called, then put back that character. Before
 416                        // exiting of course, restore the null-terminator.
 417                    
 418                        char* nullTerminator = 0;
 419                    
 420                        if (_restoreChar && !*_current)
 421                        {
 422 david.dillard 1.32         nullTerminator = _current;
 423                            *_current = _restoreChar;
 424                            _restoreChar = '\0';
 425 mike          1.13     }
 426                    
 427                        // Skip over any whitespace:
 428                    
 429                        _skipWhitespace(_current);
 430                    
 431                        if (!*_current)
 432                        {
 433 david.dillard 1.32         if (nullTerminator)
 434                                *nullTerminator = '\0';
 435 mike          1.13 
 436 david.dillard 1.32         if (!_stack.isEmpty())
 437                                throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 438 mike          1.13 
 439 david.dillard 1.32         return false;
 440 mike          1.13     }
 441                    
 442                        // Either a "<...>" or content begins next:
 443                    
 444                        if (*_current == '<')
 445                        {
 446 david.dillard 1.32         _current++;
 447                            _getElement(_current, entry);
 448 mike          1.13 
 449 david.dillard 1.32         if (nullTerminator)
 450                                *nullTerminator = '\0';
 451 mike          1.13 
 452 david.dillard 1.32         if (entry.type == XmlEntry::START_TAG)
 453                            {
 454                                if (_stack.isEmpty() && _foundRoot)
 455                                    throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 456                    
 457                                _foundRoot = true;
 458                                _stack.push((char*)entry.text);
 459                            }
 460                            else if (entry.type == XmlEntry::END_TAG)
 461                            {
 462                                if (_stack.isEmpty())
 463                                    throw XmlException(XmlException::START_END_MISMATCH, _line);
 464 mike          1.13 
 465 david.dillard 1.32             if (strcmp(_stack.top(), entry.text) != 0)
 466                                    throw XmlException(XmlException::START_END_MISMATCH, _line);
 467 mike          1.13 
 468 david.dillard 1.32             _stack.pop();
 469                            }
 470 mike          1.13 
 471 david.dillard 1.32         return true;
 472 mike          1.13     }
 473                        else
 474                        {
 475 david.dillard 1.32         entry.type = XmlEntry::CONTENT;
 476                            entry.text = _current;
 477                            _getContent(_current);
 478                            _restoreChar = *_current;
 479                            *_current = '\0';
 480 mike          1.13 
 481 david.dillard 1.32         if (nullTerminator)
 482                                *nullTerminator = '\0';
 483 mike          1.13 
 484 david.dillard 1.32         _substituteReferences((char*)entry.text);
 485                            _normalize((char*)entry.text);
 486 mike          1.13 
 487 david.dillard 1.32         return true;
 488 mike          1.13     }
 489                    }
 490                    
 491                    void XmlParser::putBack(XmlEntry& entry)
 492                    {
 493                        _putBackStack.push(entry);
 494                    }
 495                    
 496                    XmlParser::~XmlParser()
 497                    {
 498                        // Nothing to do!
 499                    }
 500                    
 501                    void XmlParser::_skipWhitespace(char*& p)
 502                    {
 503 chuck         1.26     while (*p && _isspace(*p))
 504 mike          1.13     {
 505 david.dillard 1.32         if (*p == '\n')
 506                                _line++;
 507 mike          1.13 
 508 david.dillard 1.32         p++;
 509 mike          1.13     }
 510                    }
 511                    
 512                    Boolean XmlParser::_getElementName(char*& p)
 513                    {
 514 david         1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 515 kumpf         1.24           ((*p >= 'a') && (*p <= 'z')) ||
 516                              (*p == '_')))
 517 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 518 kumpf         1.24     p++;
 519 mike          1.13 
 520 david         1.22     while ((*p) &&
 521 david.dillard 1.32            (((*p >= 'A') && (*p <= 'Z')) ||
 522                                ((*p >= 'a') && (*p <= 'z')) ||
 523                                ((*p >= '0') && (*p <= '9')) ||
 524                                *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 525                            p++;
 526 mike          1.13 
 527                        // The next character must be a space:
 528                    
 529 chuck         1.26     if (_isspace(*p))
 530 mike          1.13     {
 531 david.dillard 1.32         *p++ = '\0';
 532                            _skipWhitespace(p);
 533 mike          1.13     }
 534                    
 535                        if (*p == '>')
 536                        {
 537 david.dillard 1.32         *p++ = '\0';
 538                            return true;
 539 mike          1.13     }
 540                    
 541                        return false;
 542                    }
 543                    
 544                    Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 545                    {
 546                        openCloseElement = false;
 547                    
 548 david         1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 549 kumpf         1.24           ((*p >= 'a') && (*p <= 'z')) ||
 550                              (*p == '_')))
 551 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 552 kumpf         1.24     p++;
 553 mike          1.13 
 554 david         1.22     while ((*p) &&
 555 david.dillard 1.32            (((*p >= 'A') && (*p <= 'Z')) ||
 556                                ((*p >= 'a') && (*p <= 'z')) ||
 557                                ((*p >= '0') && (*p <= '9')) ||
 558                                *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 559                            p++;
 560 mike          1.13 
 561                        // The next character must be a space:
 562                    
 563 chuck         1.26     if (_isspace(*p))
 564 mike          1.13     {
 565 david.dillard 1.32         *p++ = '\0';
 566                            _skipWhitespace(p);
 567 mike          1.13     }
 568                    
 569                        if (*p == '>')
 570                        {
 571 david.dillard 1.32         *p++ = '\0';
 572                            return true;
 573 mike          1.13     }
 574                    
 575                        if (p[0] == '/' && p[1] == '>')
 576                        {
 577 david.dillard 1.32         openCloseElement = true;
 578                            *p = '\0';
 579                            p += 2;
 580                            return true;
 581 mike          1.13     }
 582                    
 583                        return false;
 584                    }
 585                    
 586                    void XmlParser::_getAttributeNameAndEqual(char*& p)
 587                    {
 588 david         1.25     if (!(((*p >= 'A') && (*p <= 'Z')) ||
 589 kumpf         1.24           ((*p >= 'a') && (*p <= 'z')) ||
 590                              (*p == '_')))
 591 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 592 kumpf         1.24     p++;
 593 mike          1.13 
 594 david         1.22     while ((*p) &&
 595 david.dillard 1.32            (((*p >= 'A') && (*p <= 'Z')) ||
 596                                ((*p >= 'a') && (*p <= 'z')) ||
 597                                ((*p >= '0') && (*p <= '9')) ||
 598                                *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 599                            p++;
 600 mike          1.13 
 601                        char* term = p;
 602                    
 603                        _skipWhitespace(p);
 604                    
 605                        if (*p != '=')
 606 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 607 mike          1.13 
 608                        p++;
 609                    
 610                        _skipWhitespace(p);
 611                    
 612                        *term = '\0';
 613                    }
 614                    
 615                    void XmlParser::_getAttributeValue(char*& p)
 616                    {
 617                        // ATTN-B: handle values contained in semiquotes:
 618                    
 619                        if (*p != '"' && *p != '\'')
 620 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 621 mike          1.13 
 622                        char startChar = *p++;
 623                    
 624                        while (*p && *p != startChar)
 625 david.dillard 1.32         p++;
 626 mike          1.13 
 627                        if (*p != startChar)
 628 david.dillard 1.32         throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 629 mike          1.13 
 630                        *p++ = '\0';
 631                    }
 632                    
 633                    void XmlParser::_getComment(char*& p)
 634                    {
 635                        // Now p points to first non-whitespace character beyond "<--" sequence:
 636                    
 637                        for (; *p; p++)
 638                        {
 639 david.dillard 1.32         if (p[0] == '-' && p[1] == '-')
 640                            {
 641                                if (p[2] != '>')
 642                                {
 643                                    throw XmlException(
 644                                        XmlException::MINUS_MINUS_IN_COMMENT, _line);
 645                                }
 646                    
 647                                // Find end of comment (excluding whitespace):
 648                    
 649                                *p = '\0';
 650                                p += 3;
 651                                return;
 652                            }
 653 mike          1.13     }
 654                    
 655                        // If it got this far, then the comment is unterminated:
 656                    
 657                        throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 658                    }
 659                    
 660                    void XmlParser::_getCData(char*& p)
 661                    {
 662                        // At this point p points one past "<![CDATA[" sequence:
 663                    
 664                        for (; *p; p++)
 665                        {
 666 david.dillard 1.32         if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 667                            {
 668                                *p = '\0';
 669                                p += 3;
 670                                return;
 671                            }
 672                            else if (*p == '\n')
 673                                _line++;
 674 mike          1.13     }
 675                    
 676                        // If it got this far, then the comment is unterminated:
 677                    
 678                        throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 679                    }
 680                    
 681                    void XmlParser::_getDocType(char*& p)
 682                    {
 683                        // Just ignore the DOCTYPE command for now:
 684                    
 685                        for (; *p && *p != '>'; p++)
 686                        {
 687 david.dillard 1.32         if (*p == '\n')
 688                                _line++;
 689 mike          1.13     }
 690                    
 691                        if (*p != '>')
 692 david.dillard 1.32         throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 693 mike          1.13 
 694                        p++;
 695                    }
 696                    
 697                    void XmlParser::_getContent(char*& p)
 698                    {
 699                        while (*p && *p != '<')
 700                        {
 701 david.dillard 1.32         if (*p == '\n')
 702                                _line++;
 703 mike          1.13 
 704 david.dillard 1.32         p++;
 705 mike          1.13     }
 706                    }
 707                    
 708                    void XmlParser::_substituteReferences(char* text)
 709                    {
 710 david.dillard 1.32     size_t rem = strlen(text);
 711 mike          1.13 
 712                        for (char* p = text; *p; p++, rem--)
 713                        {
 714 david.dillard 1.32         if (*p == '&')
 715                            {
 716 kumpf         1.18             // Process character or entity reference
 717 mike          1.13 
 718 kumpf         1.18             Uint16 referenceChar = 0;
 719                                Uint32 referenceLength = 0;
 720                                XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 721                    
 722                                if (*(p+1) == '#')
 723                                {
 724                                    // Found a character (numeric) reference
 725                                    // Determine whether it is decimal or hex
 726                                    if (*(p+2) == 'x')
 727                                    {
 728                                        // Decode a hexadecimal character reference
 729                                        char* q = p+3;
 730                    
 731                                        // At most four digits are allowed, plus trailing ';'
 732                                        Uint32 numDigits;
 733                                        for (numDigits = 0; numDigits < 5; numDigits++, q++)
 734                                        {
 735                                            if (isdigit(*q))
 736                                            {
 737                                                referenceChar = (referenceChar << 4);
 738                                                referenceChar += (*q - '0');
 739 kumpf         1.18                         }
 740                                            else if ((*q >= 'A') && (*q <= 'F'))
 741                                            {
 742                                                referenceChar = (referenceChar << 4);
 743                                                referenceChar += (*q - 'A' + 10);
 744                                            }
 745                                            else if ((*q >= 'a') && (*q <= 'f'))
 746                                            {
 747                                                referenceChar = (referenceChar << 4);
 748                                                referenceChar += (*q - 'a' + 10);
 749                                            }
 750                                            else if (*q == ';')
 751                                            {
 752                                                break;
 753                                            }
 754                                            else
 755                                            {
 756                                                throw XmlException(code, _line);
 757                                            }
 758                                        }
 759                    
 760 kumpf         1.18                     // Hex number must be 1 - 4 digits
 761                                        if ((numDigits == 0) || (numDigits > 4))
 762                                        {
 763                                            throw XmlException(code, _line);
 764                                        }
 765                    
 766                                        // ATTN: Currently do not support 16-bit characters
 767                                        if (referenceChar > 0xff)
 768                                        {
 769                                            // ATTN: Is there a good way to say "unsupported"?
 770                                            throw XmlException(code, _line);
 771                                        }
 772                    
 773                                        referenceLength = numDigits + 4;
 774                                    }
 775                                    else
 776                                    {
 777                                        // Decode a decimal character reference
 778                                        Uint32 newChar = 0;
 779                                        char* q = p+2;
 780                    
 781 kumpf         1.18                     // At most five digits are allowed, plus trailing ';'
 782                                        Uint32 numDigits;
 783                                        for (numDigits = 0; numDigits < 6; numDigits++, q++)
 784                                        {
 785                                            if (isdigit(*q))
 786                                            {
 787                                                newChar = (newChar * 10);
 788                                                newChar += (*q - '0');
 789                                            }
 790                                            else if (*q == ';')
 791                                            {
 792                                                break;
 793                                            }
 794                                            else
 795                                            {
 796                                                throw XmlException(code, _line);
 797                                            }
 798                                        }
 799                    
 800                                        // Decimal number must be 1 - 5 digits and fit in 16 bits
 801                                        if ((numDigits == 0) || (numDigits > 5) ||
 802 kumpf         1.18                         (newChar > 0xffff))
 803                                        {
 804                                            throw XmlException(code, _line);
 805                                        }
 806                    
 807                                        // ATTN: Currently do not support 16-bit characters
 808                                        if (newChar > 0xff)
 809                                        {
 810                                            // ATTN: Is there a good way to say "unsupported"?
 811                                            throw XmlException(code, _line);
 812                                        }
 813                    
 814                                        referenceChar = Uint16(newChar);
 815                                        referenceLength = numDigits + 3;
 816                                    }
 817                                }
 818                                else
 819                                {
 820                                    // Check for entity reference
 821                                    // ATTN: Inefficient if many entity references are supported
 822                                    Uint32 i;
 823 kumpf         1.18                 for (i = 0; i < _REFERENCES_SIZE; i++)
 824                                    {
 825                                        Uint32 length = _references[i].length;
 826                                        const char* match = _references[i].match;
 827                    
 828                                        if (strncmp(p, _references[i].match, length) == 0)
 829                                        {
 830                                            referenceChar = _references[i].replacement;
 831                                            referenceLength = length;
 832                                            break;
 833                                        }
 834                                    }
 835                    
 836                                    if (i == _REFERENCES_SIZE)
 837                                    {
 838                                        // Didn't recognize the entity reference
 839                                        // ATTN: Is there a good way to say "unsupported"?
 840                                        throw XmlException(code, _line);
 841                                    }
 842                                }
 843                    
 844 kumpf         1.18             // Replace the reference with the correct character
 845                                *p = (char)referenceChar;
 846                                char* q = p + referenceLength;
 847                                rem = rem - referenceLength + 1;
 848                                memmove(p + 1, q, rem);
 849 david.dillard 1.32         }
 850 mike          1.13     }
 851                    }
 852                    
 853                    static const char _EMPTY_STRING[] = "";
 854                    
 855                    void XmlParser::_getElement(char*& p, XmlEntry& entry)
 856                    {
 857                        entry.attributeCount = 0;
 858                    
 859                        //--------------------------------------------------------------------------
 860                        // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 861                        //--------------------------------------------------------------------------
 862                    
 863                        if (*p == '?')
 864                        {
 865 david.dillard 1.32         entry.type = XmlEntry::XML_DECLARATION;
 866                            entry.text = ++p;
 867 mike          1.13 
 868 david.dillard 1.32         Boolean openCloseElement = false;
 869 mike          1.13 
 870 david.dillard 1.32         if (_getElementName(p))
 871                                return;
 872 mike          1.13     }
 873                        else if (*p == '!')
 874                        {
 875 david.dillard 1.32         p++;
 876 mike          1.13 
 877 david.dillard 1.32         // Expect a comment or CDATA:
 878 mike          1.13 
 879 david.dillard 1.32         if (p[0] == '-' && p[1] == '-')
 880                            {
 881                                p += 2;
 882                                entry.type = XmlEntry::COMMENT;
 883                                entry.text = p;
 884                                _getComment(p);
 885                                return;
 886                            }
 887                            else if (memcmp(p, "[CDATA[", 7) == 0)
 888                            {
 889                                p += 7;
 890                                entry.type = XmlEntry::CDATA;
 891                                entry.text = p;
 892                                _getCData(p);
 893                                return;
 894                            }
 895                            else if (memcmp(p, "DOCTYPE", 7) == 0)
 896                            {
 897                                entry.type = XmlEntry::DOCTYPE;
 898                                entry.text = _EMPTY_STRING;
 899                                _getDocType(p);
 900 david.dillard 1.32             return;
 901                            }
 902                            throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 903 mike          1.13     }
 904                        else if (*p == '/')
 905                        {
 906 david.dillard 1.32         entry.type = XmlEntry::END_TAG;
 907                            entry.text = ++p;
 908 mike          1.13 
 909 david.dillard 1.32         if (!_getElementName(p))
 910                                throw(XmlException(XmlException::BAD_END_TAG, _line));
 911 mike          1.13 
 912 david.dillard 1.32         return;
 913 mike          1.13     }
 914 david         1.25     else if ((((*p >= 'A') && (*p <= 'Z')) ||
 915 kumpf         1.24               ((*p >= 'a') && (*p <= 'z')) ||
 916                                  (*p == '_')))
 917 mike          1.13     {
 918 david.dillard 1.32         entry.type = XmlEntry::START_TAG;
 919                            entry.text = p;
 920 mike          1.13 
 921 david.dillard 1.32         Boolean openCloseElement = false;
 922 mike          1.13 
 923 david.dillard 1.32         if (_getOpenElementName(p, openCloseElement))
 924                            {
 925                                if (openCloseElement)
 926                                    entry.type = XmlEntry::EMPTY_TAG;
 927                                return;
 928                            }
 929 mike          1.13     }
 930                        else
 931 david.dillard 1.32         throw XmlException(XmlException::BAD_START_TAG, _line);
 932 mike          1.13 
 933                        //--------------------------------------------------------------------------
 934                        // Grab all the attributes:
 935                        //--------------------------------------------------------------------------
 936                    
 937                        for (;;)
 938                        {
 939 david.dillard 1.32         if (entry.type == XmlEntry::XML_DECLARATION)
 940                            {
 941                                if (p[0] == '?' && p[1] == '>')
 942                                {
 943                                    p += 2;
 944                                    return;
 945                                }
 946                            }
 947                            else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 948                            {
 949                                entry.type = XmlEntry::EMPTY_TAG;
 950                                p += 2;
 951                                return;
 952                            }
 953                            else if (*p == '>')
 954                            {
 955                                p++;
 956                                return;
 957                            }
 958                    
 959                            XmlAttribute attr;
 960 david.dillard 1.32         attr.name = p;
 961                            _getAttributeNameAndEqual(p);
 962                    
 963                            if (*p != '"' && *p != '\'')
 964                                throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 965                    
 966                            attr.value = p + 1;
 967                            _getAttributeValue(p);
 968                    
 969                            if (entry.type == XmlEntry::XML_DECLARATION)
 970                            {
 971                                // The next thing must a space or a "?>":
 972 mike          1.13 
 973 david.dillard 1.32             if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
 974                                {
 975                                    throw XmlException(
 976                                        XmlException::BAD_ATTRIBUTE_VALUE, _line);
 977                                }
 978                            }
 979                            else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
 980                            {
 981                                // The next thing must be a space or a '>':
 982 mike          1.13 
 983 david.dillard 1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 984                            }
 985 mike          1.13 
 986 david.dillard 1.32         _skipWhitespace(p);
 987                    
 988                            if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 989                                throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 990                    
 991                            _substituteReferences((char*)attr.value);
 992                            entry.attributes[entry.attributeCount++] = attr;
 993 mike          1.13     }
 994                    }
 995                    
 996                    static const char* _typeStrings[] =
 997                    {
 998 david.dillard 1.32     "XML_DECLARATION",
 999                        "START_TAG",
1000                        "EMPTY_TAG",
1001                        "END_TAG",
1002 mike          1.13     "COMMENT",
1003                        "CDATA",
1004                        "DOCTYPE",
1005 david.dillard 1.32     "CONTENT"
1006 mike          1.13 };
1007                    
1008                    void XmlEntry::print() const
1009                    {
1010                        PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1011                    
1012                        Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1013                    
1014                        if (needQuotes)
1015 david.dillard 1.32         PEGASUS_STD(cout) << "\"";
1016                    
1017 mike          1.13     _printValue(text);
1018                    
1019                        if (needQuotes)
1020 david.dillard 1.32         PEGASUS_STD(cout) << "\"";
1021 mike          1.13 
1022                        PEGASUS_STD(cout) << '\n';
1023                    
1024                        for (Uint32 i = 0; i < attributeCount; i++)
1025                        {
1026 david.dillard 1.32         PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
1027                            _printValue(attributes[i].value);
1028                            PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1029 mike          1.13     }
1030                    }
1031                    
1032                    const XmlAttribute* XmlEntry::findAttribute(
1033                        const char* name) const
1034                    {
1035                        for (Uint32 i = 0; i < attributeCount; i++)
1036                        {
1037 david.dillard 1.32         if (strcmp(attributes[i].name, name) == 0)
1038                                return &attributes[i];
1039 mike          1.13     }
1040                    
1041                        return 0;
1042                    }
1043                    
1044                    // Find first non-whitespace character (set first) and last non-whitespace
1045                    // character (set last one past this). For example, consider this string:
1046                    //
1047 david.dillard 1.32 //      "   87     "
1048 mike          1.13 //
1049                    // The first pointer would point to '8' and the last pointer woudl point one
1050                    // beyond '7'.
1051                    
1052                    static void _findEnds(
1053 david.dillard 1.32     const char* str,
1054                        const char*& first,
1055 mike          1.13     const char*& last)
1056                    {
1057                        first = str;
1058                    
1059 chuck         1.26     while (_isspace(*first))
1060 david.dillard 1.32         first++;
1061 mike          1.13 
1062                        if (!*first)
1063                        {
1064 david.dillard 1.32         last = first;
1065                            return;
1066 mike          1.13     }
1067                    
1068                        last = first + strlen(first);
1069                    
1070 chuck         1.26     while (last != first && _isspace(last[-1]))
1071 david.dillard 1.32         last--;
1072 mike          1.13 }
1073                    
1074                    Boolean XmlEntry::getAttributeValue(
1075 david.dillard 1.32     const char* name,
1076 mike          1.13     Uint32& value) const
1077                    {
1078                        const XmlAttribute* attr = findAttribute(name);
1079                    
1080                        if (!attr)
1081 david.dillard 1.32         return false;
1082 mike          1.13 
1083                        const char* first;
1084                        const char* last;
1085                        _findEnds(attr->value, first, last);
1086                    
1087                        char* end = 0;
1088                        long tmp = strtol(first, &end, 10);
1089                    
1090                        if (!end || end != last)
1091 david.dillard 1.32         return false;
1092 mike          1.13 
1093                        value = Uint32(tmp);
1094                        return true;
1095                    }
1096                    
1097                    Boolean XmlEntry::getAttributeValue(
1098 david.dillard 1.32     const char* name,
1099 mike          1.13     Real32& value) const
1100                    {
1101                        const XmlAttribute* attr = findAttribute(name);
1102                    
1103                        if (!attr)
1104 david.dillard 1.32         return false;
1105 mike          1.13 
1106                        const char* first;
1107                        const char* last;
1108                        _findEnds(attr->value, first, last);
1109                    
1110                        char* end = 0;
1111                        double tmp = strtod(first, &end);
1112                    
1113                        if (!end || end != last)
1114 david.dillard 1.32         return false;
1115 mike          1.13 
1116 david.dillard 1.32     value = static_cast<Real32>(tmp);
1117 mike          1.13     return true;
1118                    }
1119                    
1120                    Boolean XmlEntry::getAttributeValue(
1121 david.dillard 1.32     const char* name,
1122 mike          1.13     const char*& value) const
1123                    {
1124                        const XmlAttribute* attr = findAttribute(name);
1125                    
1126                        if (!attr)
1127 david.dillard 1.32         return false;
1128 mike          1.13 
1129                        value = attr->value;
1130                        return true;
1131                    }
1132                    
1133                    Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1134                    {
1135                        const char* tmp;
1136                    
1137                        if (!getAttributeValue(name, tmp))
1138 david.dillard 1.32         return false;
1139 mike          1.13 
1140 chuck         1.28     value = String(tmp);
1141 mike          1.13     return true;
1142                    }
1143                    
1144 david.dillard 1.30 void XmlAppendCString(Array<char>& out, const char* str)
1145 mike          1.13 {
1146 david.dillard 1.32     out.append(str, static_cast<Uint32>(strlen(str)));
1147 mike          1.13 }
1148                    
1149                    PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2