(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.31 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.13 //
   3 karl  1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.27 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.13 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.13 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.32 //
  19 kumpf         1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                    // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                    // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                    // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                    // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                    //
  28                    //==============================================================================
  29                    //
  30                    // Author: Mike Brasher (mbrasher@bmc.com)
  31                    //
  32 david.dillard 1.30 // Modified By: David Dillard, VERITAS Software Corp.
  33                    //                  (david.dillard@veritas.com)
  34 mike          1.13 //
  35                    //%/////////////////////////////////////////////////////////////////////////////
  36                    
  37                    ////////////////////////////////////////////////////////////////////////////////
  38                    //
  39                    // XmlParser
  40                    //
  41 david.dillard 1.32 //      This file contains a simple non-validating XML parser. Here are
  42                    //      serveral rules for well-formed XML:
  43 mike          1.13 //
  44 david.dillard 1.32 //          1.  Documents must begin with an XML declaration:
  45 mike          1.13 //
  46 david.dillard 1.32 //              <?xml version="1.0" standalone="yes"?>
  47 mike          1.13 //
  48 david.dillard 1.32 //          2.  Comments have the form:
  49 mike          1.13 //
  50 david.dillard 1.32 //              <!-- blah blah blah -->
  51 mike          1.13 //
  52 david.dillard 1.32 //          3. The following entity references are supported:
  53 mike          1.13 //
  54 david.dillard 1.32 //              &amp - ampersand
  55                    //              &lt - less-than
  56                    //              &gt - greater-than
  57                    //              &quot - full quote
  58                    //              &apos - apostrophe
  59 mike          1.13 //
  60 kumpf         1.18 //             as well as character (numeric) references:
  61                    
  62                    //              &#49; - decimal reference for character '1'
  63                    //              &#x31; - hexadecimal reference for character '1'
  64                    //
  65 david.dillard 1.32 //          4. Element names and attribute names take the following form:
  66 mike          1.13 //
  67 david.dillard 1.32 //              [A-Za-z_][A-Za-z_0-9-.:]
  68 mike          1.13 //
  69 david.dillard 1.32 //          5.  Arbitrary data (CDATA) can be enclosed like this:
  70 mike          1.13 //
  71 david.dillard 1.32 //                  <![CDATA[
  72                    //                  ...
  73                    //                  ]]>
  74 mike          1.13 //
  75 david.dillard 1.32 //          6.  Element names and attributes names are case-sensitive.
  76 mike          1.13 //
  77 david.dillard 1.32 //          7.  XmlAttribute values must be delimited by full or half quotes.
  78                    //              XmlAttribute values must be delimited.
  79 mike          1.13 //
  80 david.dillard 1.32 //          8.  <!DOCTYPE...>
  81 mike          1.13 //
  82                    // TODO:
  83                    //
  84 karl          1.15 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
  85 david.dillard 1.32 //      Handle <!DOCTYPE...> sections which are complicated (containing
  86 mike          1.13 //        rules rather than references to files).
  87                    //
  88 david.dillard 1.32 //      Remove newlines from string literals:
  89 mike          1.13 //
  90                    //          Example: <xyz x="hello
  91 david.dillard 1.32 //              world">
  92 mike          1.13 //
  93                    ////////////////////////////////////////////////////////////////////////////////
  94                    
  95 sage          1.14 #include <Pegasus/Common/Config.h>
  96 mike          1.13 #include <cctype>
  97                    #include <cstdio>
  98                    #include <cstdlib>
  99                    #include <cstring>
 100                    #include "XmlParser.h"
 101                    #include "Logger.h"
 102 chuck         1.19 #include "ExceptionRep.h"
 103 mike          1.32.2.1 #include "CharSet.h"
 104 mike          1.13     
 105                        PEGASUS_NAMESPACE_BEGIN
 106                        
 107                        #define PEGASUS_ARRAY_T XmlEntry
 108                        # include "ArrayImpl.h"
 109                        #undef PEGASUS_ARRAY_T
 110                        
 111                        
 112                        ////////////////////////////////////////////////////////////////////////////////
 113                        //
 114                        // Static helper functions
 115                        //
 116                        ////////////////////////////////////////////////////////////////////////////////
 117                        
 118                        static void _printValue(const char* p)
 119                        {
 120                            for (; *p; p++)
 121                            {
 122 david.dillard 1.32             if (*p == '\n')
 123                                    PEGASUS_STD(cout) << "\\n";
 124                                else if (*p == '\r')
 125                                    PEGASUS_STD(cout) << "\\r";
 126                                else if (*p == '\t')
 127                                    PEGASUS_STD(cout) << "\\t";
 128                                else
 129                                    PEGASUS_STD(cout) << *p;
 130 mike          1.13         }
 131                        }
 132                        
 133                        struct EntityReference
 134                        {
 135                            const char* match;
 136                            Uint32 length;
 137                            char replacement;
 138                        };
 139                        
 140 kumpf         1.18     // ATTN: Add support for more entity references
 141 mike          1.13     static EntityReference _references[] =
 142                        {
 143                            { "&amp;", 5, '&' },
 144                            { "&lt;", 4, '<' },
 145                            { "&gt;", 4, '>' },
 146                            { "&quot;", 6, '"' },
 147                            { "&apos;", 6, '\'' }
 148                        };
 149                        
 150 chuck         1.26     
 151                        // Implements a check for a whitespace character, without calling
 152                        // isspace( ).  The isspace( ) function is locale-sensitive,
 153                        // and incorrectly flags some chars above 0x7f as whitespace.  This
 154                        // causes the XmlParser to incorrectly parse UTF-8 data.
 155                        //
 156                        // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
 157                        // defines white space as:
 158 david.dillard 1.32     // S    ::=    (#x20 | #x9 | #xD | #xA)+
 159 mike          1.32.2.1 static inline int _isspace(char c)
 160 chuck         1.26     {
 161 mike          1.32.2.1     return CharSet::is_space(c);
 162 chuck         1.26     }
 163                        
 164 mike          1.13     static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
 165                        
 166                        // Remove all redundant spaces from the given string:
 167                        
 168                        static void _normalize(char* text)
 169                        {
 170                            char* p = text;
 171 david.dillard 1.32         char* end = p + strlen(text);
 172 mike          1.13     
 173                            // Remove leading spaces:
 174                        
 175 chuck         1.26         while (_isspace(*p))
 176 david.dillard 1.32                     p++;
 177 mike          1.13     
 178                            if (p != text)
 179 david.dillard 1.32             memmove(text, p, end - p + 1);
 180 mike          1.13     
 181                            p = text;
 182                        
 183                            // Look for sequences of more than one space and remove all but one.
 184                        
 185                            for (;;)
 186                            {
 187 david.dillard 1.32             // Advance to the next space:
 188 mike          1.13     
 189 david.dillard 1.32             while (*p && !_isspace(*p))
 190                                    p++;
 191 mike          1.13     
 192 david.dillard 1.32             if (!*p)
 193                                    break;
 194 mike          1.13     
 195 david.dillard 1.32             // Advance to the next non-space:
 196 mike          1.13     
 197 david.dillard 1.32             char* q = p++;
 198 mike          1.13     
 199 david.dillard 1.32             while (_isspace(*p))
 200                                    p++;
 201 mike          1.13     
 202 david.dillard 1.32             // Discard trailing spaces (if we are at the end):
 203 mike          1.13     
 204 david.dillard 1.32             if (!*p)
 205                                {
 206                                    *q = '\0';
 207                                    break;
 208                                }
 209 mike          1.13     
 210 david.dillard 1.32             // Remove the redundant spaces:
 211 mike          1.13     
 212 david.dillard 1.32             const size_t n = p - q;
 213 mike          1.13     
 214 david.dillard 1.32             if (n > 1)
 215                                {
 216                                    *q++ = ' ';
 217                                    memmove(q, p, end - p + 1);
 218                                    p = q;
 219                                }
 220 mike          1.13         }
 221                        }
 222                        
 223                        ////////////////////////////////////////////////////////////////////////////////
 224                        //
 225                        // XmlException
 226                        //
 227                        ////////////////////////////////////////////////////////////////////////////////
 228                        
 229                        static const char* _xmlMessages[] =
 230                        {
 231                            "Bad opening element",
 232                            "Bad closing element",
 233                            "Bad attribute name",
 234                            "Exepected equal sign",
 235                            "Bad attribute value",
 236                            "A \"--\" sequence found within comment",
 237                            "Unterminated comment",
 238                            "Unterminated CDATA block",
 239                            "Unterminated DOCTYPE",
 240                            "Too many attributes: parser only handles 10",
 241 mike          1.13         "Malformed reference",
 242                            "Expected a comment or CDATA following \"<!\" sequence",
 243                            "Closing element does not match opening element",
 244                            "One or more tags are still open",
 245                            "More than one root element was encountered",
 246                            "Validation error",
 247                            "Semantic error"
 248                        };
 249                        
 250 david.dillard 1.32     static const char* _xmlKeys[] =
 251 chuck         1.19     {
 252 humberto      1.20         "Common.XmlParser.BAD_START_TAG",
 253 chuck         1.19         "Common.XmlParser.BAD_END_TAG",
 254                            "Common.XmlParser.BAD_ATTRIBUTE_NAME",
 255                            "Common.XmlParser.EXPECTED_EQUAL_SIGN",
 256                            "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
 257                            "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
 258                            "Common.XmlParser.UNTERMINATED_COMMENT",
 259                            "Common.XmlParser.UNTERMINATED_CDATA",
 260                            "Common.XmlParser.UNTERMINATED_DOCTYPE",
 261                            "Common.XmlParser.TOO_MANY_ATTRIBUTES",
 262                            "Common.XmlParser.MALFORMED_REFERENCE",
 263                            "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
 264                            "Common.XmlParser.START_END_MISMATCH",
 265 david.dillard 1.32         "Common.XmlParser.UNCLOSED_TAGS",
 266 chuck         1.19         "Common.XmlParser.MULTIPLE_ROOTS",
 267                            "Common.XmlParser.VALIDATION_ERROR",
 268                            "Common.XmlParser.SEMANTIC_ERROR"
 269                        };
 270                        
 271 chuck         1.23     // l10n replace _formMessage (comment out the old one)
 272 chuck         1.19     /*
 273 mike          1.13     static String _formMessage(Uint32 code, Uint32 line, const String& message)
 274                        {
 275                            String result = _xmlMessages[Uint32(code) - 1];
 276                        
 277                            char buffer[32];
 278                            sprintf(buffer, "%d", line);
 279                            result.append(": on line ");
 280                            result.append(buffer);
 281                        
 282                            if (message.size())
 283                            {
 284 david.dillard 1.32             result.append(": ");
 285                                result.append(message);
 286 mike          1.13         }
 287                        
 288                            return result;
 289                        }
 290 chuck         1.19     */
 291                        
 292                        static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
 293                        {
 294                            String dftMsg = _xmlMessages[Uint32(code) - 1];
 295                            String key = _xmlKeys[Uint32(code) - 1];
 296 david.dillard 1.32             String msg = message;
 297 chuck         1.19     
 298                            dftMsg.append(": on line $0");
 299                            if (message.size())
 300                            {
 301 david.dillard 1.32             msg = ": " + msg;
 302                                dftMsg.append("$1");
 303                            }
 304 chuck         1.19     
 305                            return MessageLoaderParms(key, dftMsg, line ,msg);
 306                        }
 307                        
 308                        static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
 309                        {
 310                            String dftMsg = _xmlMessages[Uint32(code) - 1];
 311                            String key = _xmlKeys[Uint32(code) - 1];
 312                        
 313                            dftMsg.append(": on line $0");
 314 david.dillard 1.32     
 315 chuck         1.19         return MessageLoaderParms(key, dftMsg, line);
 316                        }
 317                        
 318 mike          1.13     
 319                        XmlException::XmlException(
 320 david.dillard 1.32         XmlException::Code code,
 321 mike          1.13         Uint32 lineNumber,
 322 david.dillard 1.32         const String& message)
 323 mike          1.13         : Exception(_formMessage(code, lineNumber, message))
 324                        {
 325                        
 326                        }
 327                        
 328 chuck         1.19     
 329                        XmlException::XmlException(
 330 david.dillard 1.32         XmlException::Code code,
 331 chuck         1.19         Uint32 lineNumber,
 332 david.dillard 1.32         MessageLoaderParms& msgParms)
 333 chuck         1.19         : Exception(_formPartialMessage(code, lineNumber))
 334                        {
 335 david.dillard 1.32             if (msgParms.default_msg.size())
 336 humberto      1.21         {
 337 david.dillard 1.32             msgParms.default_msg = ": " + msgParms.default_msg;
 338                            }
 339                                _rep->message.append(MessageLoader::getMessage(msgParms));
 340 chuck         1.19     }
 341                        
 342                        
 343 mike          1.13     ////////////////////////////////////////////////////////////////////////////////
 344                        //
 345                        // XmlValidationError
 346                        //
 347                        ////////////////////////////////////////////////////////////////////////////////
 348                        
 349                        XmlValidationError::XmlValidationError(
 350                            Uint32 lineNumber,
 351                            const String& message)
 352                            : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 353                        {
 354                        
 355                        }
 356                        
 357 chuck         1.19     
 358                        XmlValidationError::XmlValidationError(
 359                            Uint32 lineNumber,
 360                            MessageLoaderParms& msgParms)
 361                            : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
 362                        {
 363                        
 364                        }
 365                        
 366                        
 367 mike          1.13     ////////////////////////////////////////////////////////////////////////////////
 368                        //
 369                        // XmlSemanticError
 370                        //
 371                        ////////////////////////////////////////////////////////////////////////////////
 372                        
 373                        XmlSemanticError::XmlSemanticError(
 374                            Uint32 lineNumber,
 375                            const String& message)
 376                            : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 377                        {
 378                        
 379                        }
 380 chuck         1.19     
 381                        
 382                        XmlSemanticError::XmlSemanticError(
 383                            Uint32 lineNumber,
 384                            MessageLoaderParms& msgParms)
 385                            : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
 386                        {
 387                        
 388                        }
 389                        
 390 mike          1.13     
 391                        ////////////////////////////////////////////////////////////////////////////////
 392                        //
 393                        // XmlParser
 394                        //
 395                        ////////////////////////////////////////////////////////////////////////////////
 396                        
 397 david.dillard 1.32     XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
 398 mike          1.13         _restoreChar('\0'), _foundRoot(false)
 399                        {
 400                        
 401                        }
 402                        
 403                        Boolean XmlParser::next(XmlEntry& entry)
 404                        {
 405                            if (!_putBackStack.isEmpty())
 406                            {
 407 david.dillard 1.32             entry = _putBackStack.top();
 408                                _putBackStack.pop();
 409                                return true;
 410 mike          1.13         }
 411                        
 412                            // If a character was overwritten with a null-terminator the last
 413                            // time this routine was called, then put back that character. Before
 414                            // exiting of course, restore the null-terminator.
 415                        
 416                            char* nullTerminator = 0;
 417                        
 418                            if (_restoreChar && !*_current)
 419                            {
 420 david.dillard 1.32             nullTerminator = _current;
 421                                *_current = _restoreChar;
 422                                _restoreChar = '\0';
 423 mike          1.13         }
 424                        
 425                            // Skip over any whitespace:
 426                        
 427                            _skipWhitespace(_current);
 428                        
 429                            if (!*_current)
 430                            {
 431 david.dillard 1.32             if (nullTerminator)
 432                                    *nullTerminator = '\0';
 433 mike          1.13     
 434 david.dillard 1.32             if (!_stack.isEmpty())
 435                                    throw XmlException(XmlException::UNCLOSED_TAGS, _line);
 436 mike          1.13     
 437 david.dillard 1.32             return false;
 438 mike          1.13         }
 439                        
 440                            // Either a "<...>" or content begins next:
 441                        
 442                            if (*_current == '<')
 443                            {
 444 david.dillard 1.32             _current++;
 445                                _getElement(_current, entry);
 446 mike          1.13     
 447 david.dillard 1.32             if (nullTerminator)
 448                                    *nullTerminator = '\0';
 449 mike          1.13     
 450 david.dillard 1.32             if (entry.type == XmlEntry::START_TAG)
 451                                {
 452                                    if (_stack.isEmpty() && _foundRoot)
 453                                        throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
 454                        
 455                                    _foundRoot = true;
 456                                    _stack.push((char*)entry.text);
 457                                }
 458                                else if (entry.type == XmlEntry::END_TAG)
 459                                {
 460                                    if (_stack.isEmpty())
 461                                        throw XmlException(XmlException::START_END_MISMATCH, _line);
 462 mike          1.13     
 463 david.dillard 1.32                 if (strcmp(_stack.top(), entry.text) != 0)
 464                                        throw XmlException(XmlException::START_END_MISMATCH, _line);
 465 mike          1.13     
 466 david.dillard 1.32                 _stack.pop();
 467                                }
 468 mike          1.13     
 469 david.dillard 1.32             return true;
 470 mike          1.13         }
 471                            else
 472                            {
 473 david.dillard 1.32             entry.type = XmlEntry::CONTENT;
 474                                entry.text = _current;
 475                                _getContent(_current);
 476                                _restoreChar = *_current;
 477                                *_current = '\0';
 478 mike          1.13     
 479 david.dillard 1.32             if (nullTerminator)
 480                                    *nullTerminator = '\0';
 481 mike          1.13     
 482 david.dillard 1.32             _substituteReferences((char*)entry.text);
 483                                _normalize((char*)entry.text);
 484 mike          1.13     
 485 david.dillard 1.32             return true;
 486 mike          1.13         }
 487                        }
 488                        
 489                        void XmlParser::putBack(XmlEntry& entry)
 490                        {
 491                            _putBackStack.push(entry);
 492                        }
 493                        
 494                        XmlParser::~XmlParser()
 495                        {
 496                            // Nothing to do!
 497                        }
 498                        
 499                        void XmlParser::_skipWhitespace(char*& p)
 500                        {
 501 chuck         1.26         while (*p && _isspace(*p))
 502 mike          1.13         {
 503 david.dillard 1.32             if (*p == '\n')
 504                                    _line++;
 505 mike          1.13     
 506 david.dillard 1.32             p++;
 507 mike          1.13         }
 508                        }
 509                        
 510                        Boolean XmlParser::_getElementName(char*& p)
 511                        {
 512 david         1.25         if (!(((*p >= 'A') && (*p <= 'Z')) ||
 513 kumpf         1.24               ((*p >= 'a') && (*p <= 'z')) ||
 514                                  (*p == '_')))
 515 david.dillard 1.32             throw XmlException(XmlException::BAD_START_TAG, _line);
 516 kumpf         1.24         p++;
 517 mike          1.13     
 518 david         1.22         while ((*p) &&
 519 david.dillard 1.32                (((*p >= 'A') && (*p <= 'Z')) ||
 520                                    ((*p >= 'a') && (*p <= 'z')) ||
 521                                    ((*p >= '0') && (*p <= '9')) ||
 522                                    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 523                                p++;
 524 mike          1.13     
 525                            // The next character must be a space:
 526                        
 527 chuck         1.26         if (_isspace(*p))
 528 mike          1.13         {
 529 david.dillard 1.32             *p++ = '\0';
 530                                _skipWhitespace(p);
 531 mike          1.13         }
 532                        
 533                            if (*p == '>')
 534                            {
 535 david.dillard 1.32             *p++ = '\0';
 536                                return true;
 537 mike          1.13         }
 538                        
 539                            return false;
 540                        }
 541                        
 542                        Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
 543                        {
 544                            openCloseElement = false;
 545                        
 546 david         1.25         if (!(((*p >= 'A') && (*p <= 'Z')) ||
 547 kumpf         1.24               ((*p >= 'a') && (*p <= 'z')) ||
 548                                  (*p == '_')))
 549 david.dillard 1.32             throw XmlException(XmlException::BAD_START_TAG, _line);
 550 kumpf         1.24         p++;
 551 mike          1.13     
 552 david         1.22         while ((*p) &&
 553 david.dillard 1.32                (((*p >= 'A') && (*p <= 'Z')) ||
 554                                    ((*p >= 'a') && (*p <= 'z')) ||
 555                                    ((*p >= '0') && (*p <= '9')) ||
 556                                    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 557                                p++;
 558 mike          1.13     
 559                            // The next character must be a space:
 560                        
 561 chuck         1.26         if (_isspace(*p))
 562 mike          1.13         {
 563 david.dillard 1.32             *p++ = '\0';
 564                                _skipWhitespace(p);
 565 mike          1.13         }
 566                        
 567                            if (*p == '>')
 568                            {
 569 david.dillard 1.32             *p++ = '\0';
 570                                return true;
 571 mike          1.13         }
 572                        
 573                            if (p[0] == '/' && p[1] == '>')
 574                            {
 575 david.dillard 1.32             openCloseElement = true;
 576                                *p = '\0';
 577                                p += 2;
 578                                return true;
 579 mike          1.13         }
 580                        
 581                            return false;
 582                        }
 583                        
 584                        void XmlParser::_getAttributeNameAndEqual(char*& p)
 585                        {
 586 david         1.25         if (!(((*p >= 'A') && (*p <= 'Z')) ||
 587 kumpf         1.24               ((*p >= 'a') && (*p <= 'z')) ||
 588                                  (*p == '_')))
 589 david.dillard 1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 590 kumpf         1.24         p++;
 591 mike          1.13     
 592 david         1.22         while ((*p) &&
 593 david.dillard 1.32                (((*p >= 'A') && (*p <= 'Z')) ||
 594                                    ((*p >= 'a') && (*p <= 'z')) ||
 595                                    ((*p >= '0') && (*p <= '9')) ||
 596                                    *p == '_' || *p == '-' || *p == ':' || *p == '.'))
 597                                p++;
 598 mike          1.13     
 599                            char* term = p;
 600                        
 601                            _skipWhitespace(p);
 602                        
 603                            if (*p != '=')
 604 david.dillard 1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
 605 mike          1.13     
 606                            p++;
 607                        
 608                            _skipWhitespace(p);
 609                        
 610                            *term = '\0';
 611                        }
 612                        
 613                        void XmlParser::_getAttributeValue(char*& p)
 614                        {
 615                            // ATTN-B: handle values contained in semiquotes:
 616                        
 617                            if (*p != '"' && *p != '\'')
 618 david.dillard 1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 619 mike          1.13     
 620                            char startChar = *p++;
 621                        
 622                            while (*p && *p != startChar)
 623 david.dillard 1.32             p++;
 624 mike          1.13     
 625                            if (*p != startChar)
 626 david.dillard 1.32             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 627 mike          1.13     
 628                            *p++ = '\0';
 629                        }
 630                        
 631                        void XmlParser::_getComment(char*& p)
 632                        {
 633                            // Now p points to first non-whitespace character beyond "<--" sequence:
 634                        
 635                            for (; *p; p++)
 636                            {
 637 david.dillard 1.32             if (p[0] == '-' && p[1] == '-')
 638                                {
 639                                    if (p[2] != '>')
 640                                    {
 641                                        throw XmlException(
 642                                            XmlException::MINUS_MINUS_IN_COMMENT, _line);
 643                                    }
 644                        
 645                                    // Find end of comment (excluding whitespace):
 646                        
 647                                    *p = '\0';
 648                                    p += 3;
 649                                    return;
 650                                }
 651 mike          1.13         }
 652                        
 653                            // If it got this far, then the comment is unterminated:
 654                        
 655                            throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
 656                        }
 657                        
 658                        void XmlParser::_getCData(char*& p)
 659                        {
 660                            // At this point p points one past "<![CDATA[" sequence:
 661                        
 662                            for (; *p; p++)
 663                            {
 664 david.dillard 1.32             if (p[0] == ']' && p[1] == ']' && p[2] == '>')
 665                                {
 666                                    *p = '\0';
 667                                    p += 3;
 668                                    return;
 669                                }
 670                                else if (*p == '\n')
 671                                    _line++;
 672 mike          1.13         }
 673                        
 674                            // If it got this far, then the comment is unterminated:
 675                        
 676                            throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
 677                        }
 678                        
 679                        void XmlParser::_getDocType(char*& p)
 680                        {
 681                            // Just ignore the DOCTYPE command for now:
 682                        
 683                            for (; *p && *p != '>'; p++)
 684                            {
 685 david.dillard 1.32             if (*p == '\n')
 686                                    _line++;
 687 mike          1.13         }
 688                        
 689                            if (*p != '>')
 690 david.dillard 1.32             throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
 691 mike          1.13     
 692                            p++;
 693                        }
 694                        
 695                        void XmlParser::_getContent(char*& p)
 696                        {
 697                            while (*p && *p != '<')
 698                            {
 699 david.dillard 1.32             if (*p == '\n')
 700                                    _line++;
 701 mike          1.13     
 702 david.dillard 1.32             p++;
 703 mike          1.13         }
 704                        }
 705                        
 706                        void XmlParser::_substituteReferences(char* text)
 707                        {
 708 david.dillard 1.32         size_t rem = strlen(text);
 709 mike          1.13     
 710                            for (char* p = text; *p; p++, rem--)
 711                            {
 712 david.dillard 1.32             if (*p == '&')
 713                                {
 714 kumpf         1.18                 // Process character or entity reference
 715 mike          1.13     
 716 kumpf         1.18                 Uint16 referenceChar = 0;
 717                                    Uint32 referenceLength = 0;
 718                                    XmlException::Code code = XmlException::MALFORMED_REFERENCE;
 719                        
 720                                    if (*(p+1) == '#')
 721                                    {
 722                                        // Found a character (numeric) reference
 723                                        // Determine whether it is decimal or hex
 724                                        if (*(p+2) == 'x')
 725                                        {
 726                                            // Decode a hexadecimal character reference
 727                                            char* q = p+3;
 728                        
 729                                            // At most four digits are allowed, plus trailing ';'
 730                                            Uint32 numDigits;
 731                                            for (numDigits = 0; numDigits < 5; numDigits++, q++)
 732                                            {
 733                                                if (isdigit(*q))
 734                                                {
 735                                                    referenceChar = (referenceChar << 4);
 736                                                    referenceChar += (*q - '0');
 737 kumpf         1.18                             }
 738                                                else if ((*q >= 'A') && (*q <= 'F'))
 739                                                {
 740                                                    referenceChar = (referenceChar << 4);
 741                                                    referenceChar += (*q - 'A' + 10);
 742                                                }
 743                                                else if ((*q >= 'a') && (*q <= 'f'))
 744                                                {
 745                                                    referenceChar = (referenceChar << 4);
 746                                                    referenceChar += (*q - 'a' + 10);
 747                                                }
 748                                                else if (*q == ';')
 749                                                {
 750                                                    break;
 751                                                }
 752                                                else
 753                                                {
 754                                                    throw XmlException(code, _line);
 755                                                }
 756                                            }
 757                        
 758 kumpf         1.18                         // Hex number must be 1 - 4 digits
 759                                            if ((numDigits == 0) || (numDigits > 4))
 760                                            {
 761                                                throw XmlException(code, _line);
 762                                            }
 763                        
 764                                            // ATTN: Currently do not support 16-bit characters
 765                                            if (referenceChar > 0xff)
 766                                            {
 767                                                // ATTN: Is there a good way to say "unsupported"?
 768                                                throw XmlException(code, _line);
 769                                            }
 770                        
 771                                            referenceLength = numDigits + 4;
 772                                        }
 773                                        else
 774                                        {
 775                                            // Decode a decimal character reference
 776                                            Uint32 newChar = 0;
 777                                            char* q = p+2;
 778                        
 779 kumpf         1.18                         // At most five digits are allowed, plus trailing ';'
 780                                            Uint32 numDigits;
 781                                            for (numDigits = 0; numDigits < 6; numDigits++, q++)
 782                                            {
 783                                                if (isdigit(*q))
 784                                                {
 785                                                    newChar = (newChar * 10);
 786                                                    newChar += (*q - '0');
 787                                                }
 788                                                else if (*q == ';')
 789                                                {
 790                                                    break;
 791                                                }
 792                                                else
 793                                                {
 794                                                    throw XmlException(code, _line);
 795                                                }
 796                                            }
 797                        
 798                                            // Decimal number must be 1 - 5 digits and fit in 16 bits
 799                                            if ((numDigits == 0) || (numDigits > 5) ||
 800 kumpf         1.18                             (newChar > 0xffff))
 801                                            {
 802                                                throw XmlException(code, _line);
 803                                            }
 804                        
 805                                            // ATTN: Currently do not support 16-bit characters
 806                                            if (newChar > 0xff)
 807                                            {
 808                                                // ATTN: Is there a good way to say "unsupported"?
 809                                                throw XmlException(code, _line);
 810                                            }
 811                        
 812                                            referenceChar = Uint16(newChar);
 813                                            referenceLength = numDigits + 3;
 814                                        }
 815                                    }
 816                                    else
 817                                    {
 818                                        // Check for entity reference
 819                                        // ATTN: Inefficient if many entity references are supported
 820                                        Uint32 i;
 821 kumpf         1.18                     for (i = 0; i < _REFERENCES_SIZE; i++)
 822                                        {
 823                                            Uint32 length = _references[i].length;
 824                                            const char* match = _references[i].match;
 825                        
 826                                            if (strncmp(p, _references[i].match, length) == 0)
 827                                            {
 828                                                referenceChar = _references[i].replacement;
 829                                                referenceLength = length;
 830                                                break;
 831                                            }
 832                                        }
 833                        
 834                                        if (i == _REFERENCES_SIZE)
 835                                        {
 836                                            // Didn't recognize the entity reference
 837                                            // ATTN: Is there a good way to say "unsupported"?
 838                                            throw XmlException(code, _line);
 839                                        }
 840                                    }
 841                        
 842 kumpf         1.18                 // Replace the reference with the correct character
 843                                    *p = (char)referenceChar;
 844                                    char* q = p + referenceLength;
 845                                    rem = rem - referenceLength + 1;
 846                                    memmove(p + 1, q, rem);
 847 david.dillard 1.32             }
 848 mike          1.13         }
 849                        }
 850                        
 851                        static const char _EMPTY_STRING[] = "";
 852                        
 853                        void XmlParser::_getElement(char*& p, XmlEntry& entry)
 854                        {
 855                            entry.attributeCount = 0;
 856                        
 857                            //--------------------------------------------------------------------------
 858                            // Get the element name (expect one of these: '?', '!', [A-Za-z_])
 859                            //--------------------------------------------------------------------------
 860                        
 861                            if (*p == '?')
 862                            {
 863 david.dillard 1.32             entry.type = XmlEntry::XML_DECLARATION;
 864                                entry.text = ++p;
 865 mike          1.13     
 866 david.dillard 1.32             Boolean openCloseElement = false;
 867 mike          1.13     
 868 david.dillard 1.32             if (_getElementName(p))
 869                                    return;
 870 mike          1.13         }
 871                            else if (*p == '!')
 872                            {
 873 david.dillard 1.32             p++;
 874 mike          1.13     
 875 david.dillard 1.32             // Expect a comment or CDATA:
 876 mike          1.13     
 877 david.dillard 1.32             if (p[0] == '-' && p[1] == '-')
 878                                {
 879                                    p += 2;
 880                                    entry.type = XmlEntry::COMMENT;
 881                                    entry.text = p;
 882                                    _getComment(p);
 883                                    return;
 884                                }
 885                                else if (memcmp(p, "[CDATA[", 7) == 0)
 886                                {
 887                                    p += 7;
 888                                    entry.type = XmlEntry::CDATA;
 889                                    entry.text = p;
 890                                    _getCData(p);
 891                                    return;
 892                                }
 893                                else if (memcmp(p, "DOCTYPE", 7) == 0)
 894                                {
 895                                    entry.type = XmlEntry::DOCTYPE;
 896                                    entry.text = _EMPTY_STRING;
 897                                    _getDocType(p);
 898 david.dillard 1.32                 return;
 899                                }
 900                                throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
 901 mike          1.13         }
 902                            else if (*p == '/')
 903                            {
 904 david.dillard 1.32             entry.type = XmlEntry::END_TAG;
 905                                entry.text = ++p;
 906 mike          1.13     
 907 david.dillard 1.32             if (!_getElementName(p))
 908                                    throw(XmlException(XmlException::BAD_END_TAG, _line));
 909 mike          1.13     
 910 david.dillard 1.32             return;
 911 mike          1.13         }
 912 david         1.25         else if ((((*p >= 'A') && (*p <= 'Z')) ||
 913 kumpf         1.24                   ((*p >= 'a') && (*p <= 'z')) ||
 914                                      (*p == '_')))
 915 mike          1.13         {
 916 david.dillard 1.32             entry.type = XmlEntry::START_TAG;
 917                                entry.text = p;
 918 mike          1.13     
 919 david.dillard 1.32             Boolean openCloseElement = false;
 920 mike          1.13     
 921 david.dillard 1.32             if (_getOpenElementName(p, openCloseElement))
 922                                {
 923                                    if (openCloseElement)
 924                                        entry.type = XmlEntry::EMPTY_TAG;
 925                                    return;
 926                                }
 927 mike          1.13         }
 928                            else
 929 david.dillard 1.32             throw XmlException(XmlException::BAD_START_TAG, _line);
 930 mike          1.13     
 931                            //--------------------------------------------------------------------------
 932                            // Grab all the attributes:
 933                            //--------------------------------------------------------------------------
 934                        
 935                            for (;;)
 936                            {
 937 david.dillard 1.32             if (entry.type == XmlEntry::XML_DECLARATION)
 938                                {
 939                                    if (p[0] == '?' && p[1] == '>')
 940                                    {
 941                                        p += 2;
 942                                        return;
 943                                    }
 944                                }
 945                                else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
 946                                {
 947                                    entry.type = XmlEntry::EMPTY_TAG;
 948                                    p += 2;
 949                                    return;
 950                                }
 951                                else if (*p == '>')
 952                                {
 953                                    p++;
 954                                    return;
 955                                }
 956                        
 957                                XmlAttribute attr;
 958 david.dillard 1.32             attr.name = p;
 959                                _getAttributeNameAndEqual(p);
 960                        
 961                                if (*p != '"' && *p != '\'')
 962                                    throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 963                        
 964                                attr.value = p + 1;
 965                                _getAttributeValue(p);
 966                        
 967                                if (entry.type == XmlEntry::XML_DECLARATION)
 968                                {
 969                                    // The next thing must a space or a "?>":
 970 mike          1.13     
 971 david.dillard 1.32                 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
 972                                    {
 973                                        throw XmlException(
 974                                            XmlException::BAD_ATTRIBUTE_VALUE, _line);
 975                                    }
 976                                }
 977                                else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
 978                                {
 979                                    // The next thing must be a space or a '>':
 980 mike          1.13     
 981 david.dillard 1.32                 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
 982                                }
 983 mike          1.13     
 984 david.dillard 1.32             _skipWhitespace(p);
 985                        
 986                                if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
 987                                    throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
 988                        
 989                                _substituteReferences((char*)attr.value);
 990                                entry.attributes[entry.attributeCount++] = attr;
 991 mike          1.13         }
 992                        }
 993                        
 994                        static const char* _typeStrings[] =
 995                        {
 996 david.dillard 1.32         "XML_DECLARATION",
 997                            "START_TAG",
 998                            "EMPTY_TAG",
 999                            "END_TAG",
1000 mike          1.13         "COMMENT",
1001                            "CDATA",
1002                            "DOCTYPE",
1003 david.dillard 1.32         "CONTENT"
1004 mike          1.13     };
1005                        
1006                        void XmlEntry::print() const
1007                        {
1008                            PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1009                        
1010                            Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1011                        
1012                            if (needQuotes)
1013 david.dillard 1.32             PEGASUS_STD(cout) << "\"";
1014                        
1015 mike          1.13         _printValue(text);
1016                        
1017                            if (needQuotes)
1018 david.dillard 1.32             PEGASUS_STD(cout) << "\"";
1019 mike          1.13     
1020                            PEGASUS_STD(cout) << '\n';
1021                        
1022                            for (Uint32 i = 0; i < attributeCount; i++)
1023                            {
1024 david.dillard 1.32             PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
1025                                _printValue(attributes[i].value);
1026                                PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1027 mike          1.13         }
1028                        }
1029                        
1030                        const XmlAttribute* XmlEntry::findAttribute(
1031                            const char* name) const
1032                        {
1033                            for (Uint32 i = 0; i < attributeCount; i++)
1034                            {
1035 david.dillard 1.32             if (strcmp(attributes[i].name, name) == 0)
1036                                    return &attributes[i];
1037 mike          1.13         }
1038                        
1039                            return 0;
1040                        }
1041                        
1042                        // Find first non-whitespace character (set first) and last non-whitespace
1043                        // character (set last one past this). For example, consider this string:
1044                        //
1045 david.dillard 1.32     //      "   87     "
1046 mike          1.13     //
1047                        // The first pointer would point to '8' and the last pointer woudl point one
1048                        // beyond '7'.
1049                        
1050                        static void _findEnds(
1051 david.dillard 1.32         const char* str,
1052                            const char*& first,
1053 mike          1.13         const char*& last)
1054                        {
1055                            first = str;
1056                        
1057 chuck         1.26         while (_isspace(*first))
1058 david.dillard 1.32             first++;
1059 mike          1.13     
1060                            if (!*first)
1061                            {
1062 david.dillard 1.32             last = first;
1063                                return;
1064 mike          1.13         }
1065                        
1066                            last = first + strlen(first);
1067                        
1068 chuck         1.26         while (last != first && _isspace(last[-1]))
1069 david.dillard 1.32             last--;
1070 mike          1.13     }
1071                        
1072                        Boolean XmlEntry::getAttributeValue(
1073 david.dillard 1.32         const char* name,
1074 mike          1.13         Uint32& value) const
1075                        {
1076                            const XmlAttribute* attr = findAttribute(name);
1077                        
1078                            if (!attr)
1079 david.dillard 1.32             return false;
1080 mike          1.13     
1081                            const char* first;
1082                            const char* last;
1083                            _findEnds(attr->value, first, last);
1084                        
1085                            char* end = 0;
1086                            long tmp = strtol(first, &end, 10);
1087                        
1088                            if (!end || end != last)
1089 david.dillard 1.32             return false;
1090 mike          1.13     
1091                            value = Uint32(tmp);
1092                            return true;
1093                        }
1094                        
1095                        Boolean XmlEntry::getAttributeValue(
1096 david.dillard 1.32         const char* name,
1097 mike          1.13         Real32& value) const
1098                        {
1099                            const XmlAttribute* attr = findAttribute(name);
1100                        
1101                            if (!attr)
1102 david.dillard 1.32             return false;
1103 mike          1.13     
1104                            const char* first;
1105                            const char* last;
1106                            _findEnds(attr->value, first, last);
1107                        
1108                            char* end = 0;
1109                            double tmp = strtod(first, &end);
1110                        
1111                            if (!end || end != last)
1112 david.dillard 1.32             return false;
1113 mike          1.13     
1114 david.dillard 1.32         value = static_cast<Real32>(tmp);
1115 mike          1.13         return true;
1116                        }
1117                        
1118                        Boolean XmlEntry::getAttributeValue(
1119 david.dillard 1.32         const char* name,
1120 mike          1.13         const char*& value) const
1121                        {
1122                            const XmlAttribute* attr = findAttribute(name);
1123                        
1124                            if (!attr)
1125 david.dillard 1.32             return false;
1126 mike          1.13     
1127                            value = attr->value;
1128                            return true;
1129                        }
1130                        
1131                        Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1132                        {
1133                            const char* tmp;
1134                        
1135                            if (!getAttributeValue(name, tmp))
1136 david.dillard 1.32             return false;
1137 mike          1.13     
1138 chuck         1.28         value = String(tmp);
1139 mike          1.13         return true;
1140                        }
1141                        
1142 david.dillard 1.30     void XmlAppendCString(Array<char>& out, const char* str)
1143 mike          1.13     {
1144 david.dillard 1.32         out.append(str, static_cast<Uint32>(strlen(str)));
1145 mike          1.13     }
1146                        
1147                        PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2