(file) Return to XmlParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/XmlParser.cpp between version 1.16 and 1.56

version 1.16, 2002/06/01 00:56:41 version 1.56, 2012/07/26 11:14:54
Line 1 
Line 1 
 //%/////////////////////////////////////////////////////////////////////////////  //%LICENSE////////////////////////////////////////////////////////////////
 //  
 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,  
 // The Open Group, Tivoli Systems  
 //  
 // Permission is hereby granted, free of charge, to any person obtaining a copy  
 // of this software and associated documentation files (the "Software"), to  
 // deal in the Software without restriction, including without limitation the  
 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or  
 // sell copies of the Software, and to permit persons to whom the Software is  
 // furnished to do so, subject to the following conditions:  
 //  
 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN  
 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED  
 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT  
 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR  
 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT  
 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN  
 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION  
 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  
 //  
 //==============================================================================  
 // //
 // Author: Mike Brasher (mbrasher@bmc.com)  // Licensed to The Open Group (TOG) under one or more contributor license
   // agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
   // this work for additional information regarding copyright ownership.
   // Each contributor licenses this file to you under the OpenPegasus Open
   // Source License; you may not use this file except in compliance with the
   // License.
   //
   // Permission is hereby granted, free of charge, to any person obtaining a
   // copy of this software and associated documentation files (the "Software"),
   // to deal in the Software without restriction, including without limitation
   // the rights to use, copy, modify, merge, publish, distribute, sublicense,
   // and/or sell copies of the Software, and to permit persons to whom the
   // Software is furnished to do so, subject to the following conditions:
   //
   // The above copyright notice and this permission notice shall be included
   // in all copies or substantial portions of the Software.
   //
   // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
   // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 // //
 // Modified By:  //////////////////////////////////////////////////////////////////////////
 // //
 //%///////////////////////////////////////////////////////////////////////////// //%/////////////////////////////////////////////////////////////////////////////
  
Line 50 
Line 52 
 //              &quot - full quote //              &quot - full quote
 //              &apos - apostrophe //              &apos - apostrophe
 // //
   //             as well as character (numeric) references:
   //
   //              1 - decimal reference for character '1'
   //              1 - hexadecimal reference for character '1'
   //
 //          4. Element names and attribute names take the following form: //          4. Element names and attribute names take the following form:
 // //
 //              [A-Za-z_][A-Za-z_0-9-.:] //              [A-Za-z_][A-Za-z_0-9-.:]
Line 69 
Line 76 
 // //
 // TODO: // TODO:
 // //
 //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.  //      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
 //      Handle <!DOCTYPE...> sections which are complicated (containing  //      work. Handle <!DOCTYPE...> sections which are complicated (containing
 //        rules rather than references to files). //        rules rather than references to files).
 // //
 //      Handle reference of this form: "&#913;"  
 //  
 //      Remove newlines from string literals: //      Remove newlines from string literals:
 // //
 //          Example: <xyz x="hello //          Example: <xyz x="hello
Line 84 
Line 89 
  
 #include <Pegasus/Common/Config.h> #include <Pegasus/Common/Config.h>
 #include <cctype> #include <cctype>
 #include <cassert>  
 #include <cstdio> #include <cstdio>
 #include <cstdlib> #include <cstdlib>
 #include <cstring> #include <cstring>
 #include "XmlParser.h" #include "XmlParser.h"
 #include "Logger.h" #include "Logger.h"
   #include "ExceptionRep.h"
   #include "CharSet.h"
  
 PEGASUS_NAMESPACE_BEGIN PEGASUS_NAMESPACE_BEGIN
  
 #define PEGASUS_ARRAY_T XmlEntry  
 # include "ArrayImpl.h"  
 #undef PEGASUS_ARRAY_T  
   
   
 //////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
 // //
 // Static helper functions // Static helper functions
Line 126 
Line 127 
     char replacement;     char replacement;
 }; };
  
 static EntityReference _references[] =  // Implements a check for a whitespace character, without calling
 {  // isspace( ).  The isspace( ) function is locale-sensitive,
     { "&amp;", 5, '&' },  // and incorrectly flags some chars above 0x7f as whitespace.  This
     { "&lt;", 4, '<' },  // causes the XmlParser to incorrectly parse UTF-8 data.
     { "&gt;", 4, '>' },  //
     { "&quot;", 6, '"' },  // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
     { "&apos;", 6, '\'' }  // defines white space as:
 };  // S    ::=    (#x20 | #x9 | #xD | #xA)+
   static inline int _isspace(char c)
 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));  
   
 // Remove all redundant spaces from the given string:  
   
 static void _normalize(char* text)  
 {  
     Uint32 length = strlen(text);  
     char* p = text;  
     char* end = p + length;  
   
     // Remove leading spaces:  
   
     while (isspace(*p))  
         p++;  
   
     if (p != text)  
         memmove(text, p, end - p + 1);  
   
     p = text;  
   
     // Look for sequences of more than one space and remove all but one.  
   
     for (;;)  
     {  
         // Advance to the next space:  
   
         while (*p && !isspace(*p))  
             p++;  
   
         if (!*p)  
             break;  
   
         // Advance to the next non-space:  
   
         char* q = p++;  
   
         while (isspace(*p))  
             p++;  
   
         // Discard trailing spaces (if we are at the end):  
   
         if (!*p)  
         {         {
             *q = '\0';      return CharSet::isXmlWhiteSpace((Uint8)c);
             break;  
         }         }
  
         // Remove the redundant spaces:  
   
         Uint32 n = p - q;  
   
         if (n > 1)  
         {  
             *q++ = ' ';  
             memmove(q, p, end - p + 1);  
             p = q;  
         }  
     }  
 }  
  
 //////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
 // //
Line 212 
Line 158 
     "Unterminated comment",     "Unterminated comment",
     "Unterminated CDATA block",     "Unterminated CDATA block",
     "Unterminated DOCTYPE",     "Unterminated DOCTYPE",
     "Too many attributes: parser only handles 10",  
     "Malformed reference",     "Malformed reference",
     "Expected a comment or CDATA following \"<!\" sequence",     "Expected a comment or CDATA following \"<!\" sequence",
     "Closing element does not match opening element",     "Closing element does not match opening element",
     "One or more tags are still open",     "One or more tags are still open",
     "More than one root element was encountered",     "More than one root element was encountered",
     "Validation error",     "Validation error",
     "Semantic error"      "Semantic error",
       "Namespace not declared"
 }; };
  
 static String _formMessage(Uint32 code, Uint32 line, const String& message)  static const char* _xmlKeys[] =
 { {
     String result = _xmlMessages[Uint32(code) - 1];      "Common.XmlParser.BAD_START_TAG",
       "Common.XmlParser.BAD_END_TAG",
       "Common.XmlParser.BAD_ATTRIBUTE_NAME",
       "Common.XmlParser.EXPECTED_EQUAL_SIGN",
       "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
       "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
       "Common.XmlParser.UNTERMINATED_COMMENT",
       "Common.XmlParser.UNTERMINATED_CDATA",
       "Common.XmlParser.UNTERMINATED_DOCTYPE",
       "Common.XmlParser.MALFORMED_REFERENCE",
       "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
       "Common.XmlParser.START_END_MISMATCH",
       "Common.XmlParser.UNCLOSED_TAGS",
       "Common.XmlParser.MULTIPLE_ROOTS",
       "Common.XmlParser.VALIDATION_ERROR",
       "Common.XmlParser.SEMANTIC_ERROR",
       "Common.XmlParser.UNDECLARED_NAMESPACE"
   };
  
     char buffer[32];  
     sprintf(buffer, "%d", line);  
     result.append(": on line ");  
     result.append(buffer);  
  
   static MessageLoaderParms _formMessage(
       Uint32 code,
       Uint32 line,
       const String& message)
   {
       String dftMsg = _xmlMessages[Uint32(code) - 1];
       const char* key = _xmlKeys[Uint32(code) - 1];
       String msg = message;
   
       dftMsg.append(": on line $0");
     if (message.size())     if (message.size())
     {     {
         result.append(": ");          msg = ": " + msg;
         result.append(message);          dftMsg.append("$1");
     }     }
  
     return result;      return MessageLoaderParms(key, dftMsg.getCString(), line ,msg);
   }
   
   static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
   {
       String dftMsg = _xmlMessages[Uint32(code) - 1];
       const char* key = _xmlKeys[Uint32(code) - 1];
   
       dftMsg.append(": on line $0");
   
       return MessageLoaderParms(key, dftMsg.getCString(), line);
 } }
  
   
 XmlException::XmlException( XmlException::XmlException(
     XmlException::Code code,     XmlException::Code code,
     Uint32 lineNumber,     Uint32 lineNumber,
Line 249 
Line 229 
  
 } }
  
   
   XmlException::XmlException(
       XmlException::Code code,
       Uint32 lineNumber,
       MessageLoaderParms& msgParms)
       : Exception(_formPartialMessage(code, lineNumber))
   {
           if (msgParms.default_msg.size())
       {
           msgParms.default_msg = ": " + msgParms.default_msg;
       }
           _rep->message.append(MessageLoader::getMessage(msgParms));
   }
   
   
 //////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
 // //
 // XmlValidationError // XmlValidationError
Line 260 
Line 255 
     const String& message)     const String& message)
     : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)     : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
 { {
   }
   
  
   XmlValidationError::XmlValidationError(
       Uint32 lineNumber,
       MessageLoaderParms& msgParms)
       : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
   {
 } }
  
   
 //////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
 // //
 // XmlSemanticError // XmlSemanticError
Line 274 
Line 277 
     const String& message)     const String& message)
     : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)     : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
 { {
   }
   
  
   XmlSemanticError::XmlSemanticError(
       Uint32 lineNumber,
       MessageLoaderParms& msgParms)
       : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
   {
 } }
  
   
 //////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
 // //
 // XmlParser // XmlParser
 // //
 //////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
  
 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),  XmlParser::XmlParser(char* text, XmlNamespace* ns, Boolean hideEmptyTags)
     _restoreChar('\0'), _foundRoot(false)      : _line(1),
         _current(text),
         _restoreChar('\0'),
         _foundRoot(false),
         _supportedNamespaces(ns),
         // Start valid indexes with -2. -1 is reserved for not found.
         _currentUnsupportedNSType(-2),
         _hideEmptyTags(hideEmptyTags)
 { {
   }
  
   inline void _skipWhitespace(Uint32& line, char*& p)
   {
       while (*p && _isspace(*p))
       {
           if (*p == '\n')
               line++;
   
           p++;
       }
 } }
  
 Boolean XmlParser::next(XmlEntry& entry)  #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
       defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
   #pragma optimize( "", off )
   #endif
   static int _getEntityRef(char*& p)
   {
       if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
       {
           p += 3;
           return '>';
       }
   
       if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
       {
           p += 3;
           return '<';
       }
   
       if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
           (p[4] == ';'))
       {
           p += 5;
           return '\'';
       }
   
       if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
           (p[4] == ';'))
       {
           p += 5;
           return '"';
       }
   
       if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
       {
           p += 4;
           return '&';
       }
   
       return -1;
   }
   #if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \
       defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)
   #pragma optimize( "", on )
   #endif
   
   static inline int _getCharRef(char*& p)
   {
       char* end;
       unsigned long ch;
       Boolean hex = false;
   
       if (*p == 'x')
       {
           hex = true;
           ch = strtoul(++p, &end, 16);
       }
       else
       {
           ch = strtoul(p, &end, 10);
       }
   
       if ((end == p) || (*end != ';') || (ch > 255))
       {
           return -1;
       }
   
       if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
       {
           return -1;
       }
   
       p = end + 1;
   
       return ch;
   }
   
   // Parse an entity reference or a character reference
   static inline int _getRef(Uint32 line, char*& p)
   {
       int ch;
   
       if (*p == '#')
       {
           ch = _getCharRef(++p);
       }
       else
       {
           ch = _getEntityRef(p);
       }
   
       if (ch == -1)
       {
           throw XmlException(XmlException::MALFORMED_REFERENCE, line);
       }
   
       return ch;
   }
   
   static inline void _normalizeElementValue(
       Uint32& line,
       char*& p,
       Uint32 &textLen)
   {
       // Process one character at a time:
   
       char* q = p;
       char *start = p;
   
       while (*p && (*p != '<'))
       {
           if (_isspace(*p))
           {
               // Trim whitespace from the end of the value, but do not compress
               // whitespace within the value.
   
               const char* start = p;
   
               if (*p++ == '\n')
               {
                   line++;
               }
   
               _skipWhitespace(line, p);
   
               if (*p && (*p != '<'))
               {
                   // Transfer internal whitespace to q without compressing it.
                   const char* i = start;
                   while (i < p)
                   {
                       *q++ = *i++;
                   }
               }
               else
               {
                   // Do not transfer trailing whitespace to q.
                   break;
               }
           }
           else if (*p == '&')
           {
               // Process an entity reference or a character reference.
   
               *q++ = _getRef(line, ++p);
           }
           else
           {
               *q++ = *p++;
           }
       }
   
       // If q got behind p, it is safe and necessary to null-terminate q
   
       if (q != p)
       {
           *q = '\0';
       }
       textLen = (Uint32)(q - start);
   }
   
   static inline void _normalizeAttributeValue(
       Uint32& line,
       char*& p,
       char end_char,
       char*& start)
   {
       // Skip over leading whitespace:
   
       _skipWhitespace(line, p);
       start = p;
   
       // Process one character at a time:
   
       char* q = p;
   
       while (*p && (*p != end_char))
       {
           if (_isspace(*p))
           {
               // Compress sequences of whitespace characters to a single space
               // character. Update line number when newlines encountered.
   
               if (*p++ == '\n')
               {
                   line++;
               }
   
               *q++ = ' ';
   
               _skipWhitespace(line, p);
           }
           else if (*p == '&')
           {
               // Process an entity reference or a character reference.
   
               *q++ = _getRef(line, ++p);
           }
           else
           {
               *q++ = *p++;
           }
       }
   
       // Remove single trailing whitespace (consecutive whitespaces already
       // compressed above).  Since p >= q, we can tell if we need to strip a
       // trailing space from q by looking at the end of p.  We must not look at
       // the last character of p, though, if p is an empty string.
       Boolean adjust_q = (p != start) && _isspace(p[-1]);
   
       // We encountered a the end_char or a zero-terminator.
   
       *q = *p;
   
       if (adjust_q)
       {
           q--;
       }
   
       // If q got behind p, it is safe and necessary to null-terminate q
   
       if (q != p)
       {
           *q = '\0';
       }
   }
   
   Boolean XmlParser::_next(
       XmlEntry& entry,
       Boolean includeComment)
 { {
     if (!_putBackStack.isEmpty())     if (!_putBackStack.isEmpty())
     {     {
Line 311 
Line 567 
         _restoreChar = '\0';         _restoreChar = '\0';
     }     }
  
     // Skip over any whitespace:      entry.attributes.clear();
  
     _skipWhitespace(_current);      if (_supportedNamespaces)
       {
           // Remove namespaces of a deeper scope level from the stack.
           while (!_nameSpaces.isEmpty() &&
                  _nameSpaces.top().scopeLevel > _stack.size())
           {
               _nameSpaces.pop();
           }
       }
   
       // Loop until we are done with comments if includeComment is false.
       do
       {
           // Skip over any whitespace:
           _skipWhitespace(_line, _current);
  
     if (!*_current)     if (!*_current)
     {     {
Line 354 
Line 624 
  
             _stack.pop();             _stack.pop();
         }         }
   
         return true;  
     }     }
     else     else
     {     {
               // Normalize the content:
   
               char* start = _current;
               Uint32 textLen;
               _normalizeElementValue(_line, _current, textLen);
   
               // Get the content:
   
         entry.type = XmlEntry::CONTENT;         entry.type = XmlEntry::CONTENT;
         entry.text = _current;              entry.text = start;
         _getContent(_current);              entry.textLen = textLen;
   
               // Overwrite '<' with a null character (temporarily).
   
         _restoreChar = *_current;         _restoreChar = *_current;
         *_current = '\0';         *_current = '\0';
  
         if (nullTerminator)         if (nullTerminator)
             *nullTerminator = '\0';             *nullTerminator = '\0';
           }
       } while (!includeComment && entry.type == XmlEntry::COMMENT);
   
       if (_supportedNamespaces &&
           (entry.type == XmlEntry::START_TAG ||
            entry.type == XmlEntry::EMPTY_TAG ||
            entry.type == XmlEntry::END_TAG))
       {
           // Determine the namespace type for this entry
   
           if (entry.type == XmlEntry::START_TAG ||
               entry.type == XmlEntry::EMPTY_TAG)
           {
               // Process namespace declarations and determine the namespace type
               // for the attributes.
   
               Uint32 scopeLevel = _stack.size();
               if (entry.type == XmlEntry::EMPTY_TAG)
               {
                   // Empty tags are deeper scope, but not pushed onto the stack
                   scopeLevel++;
               }
  
         _substituteReferences((char*)entry.text);              for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++)
         _normalize((char*)entry.text);              {
                   XmlAttribute& attr = entry.attributes[i];
                   if ((strncmp(attr.name, "xmlns:", 6) == 0) ||
                       (strcmp(attr.name, "xmlns") == 0))
                   {
                       // Process a namespace declaration
                       XmlNamespace ns;
                       if (attr.name[5] == ':')
                       {
                           ns.localName = attr.localName;
                       }
                       else
                       {
                           // Default name space has no local name
                           ns.localName = 0;
                       }
                       ns.extendedName = attr.value;
                       ns.scopeLevel = scopeLevel;
                       ns.type = _getSupportedNamespaceType(ns.extendedName);
   
                       // If the namespace is not supported, assign it a unique
                       // negative identifier.
                       if (ns.type == -1)
                       {
                           ns.type = _currentUnsupportedNSType--;
                       }
   
                       _nameSpaces.push(ns);
                   }
                   else
                   {
                       // Get the namespace type for this attribute.
                       attr.nsType = _getNamespaceType(attr.name);
                   }
               }
           }
   
           entry.nsType = _getNamespaceType(entry.text);
       }
       else
       {
           entry.nsType = -1;
       }
  
         return true;         return true;
     }     }
   
   Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)
   {
       if (_hideEmptyTags)
       {
           // Get the next tag.
   
           if (!_next(entry, includeComment))
               return false;
   
           // If an EMPTY_TAG is encountered, then convert it to a START_TAG and
           // push a matching END_TAG on the put-back stack. This hides every
           // EMPTY_TAG from the caller.
   
           if (entry.type == XmlEntry::EMPTY_TAG)
           {
               entry.type = XmlEntry::START_TAG;
   
               XmlEntry tmp;
               tmp.type = XmlEntry::END_TAG;
               tmp.text = entry.text;
               tmp.nsType = entry.nsType;
               tmp.localName = entry.localName;
   
               _putBackStack.push(tmp);
           }
   
           return true;
       }
       else
           return _next(entry, includeComment);
   }
   
   // Get the namespace type of the given tag
   int XmlParser::_getNamespaceType(const char* tag)
   {
       const char* pos = strchr(tag, ':');
   
       // If ':' is not found, the tag is not namespace qualified and we
       // need to look for the default name space.
   
       // Search the namespace stack from the top
       for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
       {
           // If ':' is found, look for the name space with the matching
           // local name...
           if ((pos && _nameSpaces[i].localName &&
                !strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||
               // ... otherwise look for the default name space. It's the
               // one with localName set to NULL
               (!pos && !_nameSpaces[i].localName))
           {
               return _nameSpaces[i].type;
           }
       }
   
       // If the tag is namespace qualified, but the name space has not been
       // declared, it's malformed XML and we must throw an exception.
       // Note:  The "xml" namespace is specifically defined by the W3C as a
       // reserved prefix ("http://www.w3.org/XML/1998/namespace").
       if (pos && (strncmp(tag, "xml:", 4) != 0))
       {
           throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);
       }
   
       // Otherwise it's OK not to have a name space.
       return -1;
   }
   
   // Given the extended namespace name, find it in the table of supported
   // namespaces and return its type.
   int XmlParser::_getSupportedNamespaceType(const char* extendedName)
   {
       for (Sint32 i = 0;
            _supportedNamespaces[i].localName != 0;
            i++)
       {
           PEGASUS_ASSERT(_supportedNamespaces[i].type == i);
           if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))
           {
               return _supportedNamespaces[i].type;
           }
       }
       return -1;
   }
   
   XmlNamespace* XmlParser::getNamespace(int nsType)
   {
       for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)
       {
           if (_nameSpaces[i].type == nsType)
           {
               return &_nameSpaces[i];
           }
       }
       return 0;
 } }
  
 void XmlParser::putBack(XmlEntry& entry) void XmlParser::putBack(XmlEntry& entry)
Line 385 
Line 824 
     // Nothing to do!     // Nothing to do!
 } }
  
 void XmlParser::_skipWhitespace(char*& p)  // A-Za-z0-9_-.  (Note that ':' is not included and must be checked separately)
   static unsigned char _isInnerElementChar[] =
 { {
     while (*p && isspace(*p))      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   };
   
   inline Boolean _getQName(char*& p, const char*& localName)
     {     {
         if (*p == '\n')      localName = p;
             _line++;  
       if (!CharSet::isAlNumUnder(Uint8(*p)))
           return false;
   
       p++;
  
       // No explicit test for NULL termination is needed.
       // On position 0 of the array false is returned.
       while (_isInnerElementChar[Uint8(*p)])
           p++;
   
       // We've validated the prefix, now validate the local name
       if (*p == ':')
       {
           localName = ++p;
   
           if (!CharSet::isAlNumUnder(Uint8(*p)))
               return false;
   
           p++;
           // No explicit test for NULL termination is needed.
           // On position 0 of the array false is returned.
           while (_isInnerElementChar[Uint8(*p)])
         p++;         p++;
     }     }
   
       return true;
 } }
  
 Boolean XmlParser::_getElementName(char*& p)  Boolean XmlParser::_getElementName(char*& p, const char*& localName)
 { {
     if (!isalpha(*p) && *p != '_')      if (!_getQName(p, localName))
         throw XmlException(XmlException::BAD_START_TAG, _line);         throw XmlException(XmlException::BAD_START_TAG, _line);
  
     while (*p &&  
         (isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))  
         p++;  
   
     // The next character must be a space:     // The next character must be a space:
  
     if (isspace(*p))      if (_isspace(*p))
     {     {
         *p++ = '\0';         *p++ = '\0';
         _skipWhitespace(p);          _skipWhitespace(_line, p);
     }     }
  
     if (*p == '>')     if (*p == '>')
Line 422 
Line 890 
     return false;     return false;
 } }
  
 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)  Boolean XmlParser::_getOpenElementName(
       char*& p,
       const char*& localName,
       Boolean& openCloseElement)
 { {
     openCloseElement = false;     openCloseElement = false;
  
     if (!isalpha(*p) && *p != '_')      if (!_getQName(p, localName))
         throw XmlException(XmlException::BAD_START_TAG, _line);         throw XmlException(XmlException::BAD_START_TAG, _line);
  
     while (*p &&  
         (isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))  
         p++;  
   
     // The next character must be a space:     // The next character must be a space:
  
     if (isspace(*p))      if (_isspace(*p))
     {     {
         *p++ = '\0';         *p++ = '\0';
         _skipWhitespace(p);          _skipWhitespace(_line, p);
     }     }
  
     if (*p == '>')     if (*p == '>')
Line 458 
Line 925 
     return false;     return false;
 } }
  
 void XmlParser::_getAttributeNameAndEqual(char*& p)  void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName)
 { {
     if (!isalpha(*p) && *p != '_')      if (!_getQName(p, localName))
         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
  
     while (*p &&  
         (isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))  
         p++;  
   
     char* term = p;     char* term = p;
  
     _skipWhitespace(p);      _skipWhitespace(_line, p);
  
     if (*p != '=')     if (*p != '=')
         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);         throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
  
     p++;     p++;
  
     _skipWhitespace(p);      _skipWhitespace(_line, p);
  
     *term = '\0';     *term = '\0';
 } }
  
 void XmlParser::_getAttributeValue(char*& p)  
 {  
     // ATTN-B: handle values contained in semiquotes:  
   
     if (*p != '"' && *p != '\'')  
         throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);  
   
     char startChar = *p++;  
   
     while (*p && *p != startChar)  
         p++;  
   
     if (*p != startChar)  
         throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);  
   
     *p++ = '\0';  
 }  
   
 void XmlParser::_getComment(char*& p) void XmlParser::_getComment(char*& p)
 { {
     // Now p points to first non-whitespace character beyond "<--" sequence:     // Now p points to first non-whitespace character beyond "<--" sequence:
Line 563 
Line 1008 
     p++;     p++;
 } }
  
 void XmlParser::_getContent(char*& p)  
 {  
     while (*p && *p != '<')  
     {  
         if (*p == '\n')  
             _line++;  
   
         p++;  
     }  
 }  
   
 void XmlParser::_substituteReferences(char* text)  
 {  
     Uint32 rem = strlen(text);  
   
     for (char* p = text; *p; p++, rem--)  
     {  
         if (*p == '&')  
         {  
             // Look for predefined entity reference:  
   
             Boolean found = false;  
   
             for (Uint32 i = 0; i < _REFERENCES_SIZE; i++)  
             {  
                 Uint32 length = _references[i].length;  
                 const char* match = _references[i].match;  
   
                 if (strncmp(p, _references[i].match, length) == 0)  
                 {  
                     found = true;  
                     *p = _references[i].replacement;  
                     char* q = p + length;  
                     rem = rem - length + 1;  
                     memmove(p + 1, q, rem);  
                 }  
             }  
   
             // If not found, then at least make sure it is well formed:  
   
             if (!found)  
             {  
                 char* start = p;  
                 p++;  
   
                 XmlException::Code code = XmlException::MALFORMED_REFERENCE;  
   
                 if (isalpha(*p) || *p == '_')  
                 {  
                     for (p++; *p && *p != ';'; p++)  
                     {  
                         if (!isalnum(*p) && *p != '_')  
                             throw XmlException(code, _line);  
                     }  
                 }  
                 else if (*p == '#')  
                 {  
                     for (p++ ; *p && *p != ';'; p++)  
                     {  
                         if (!isdigit(*p))  
                             throw XmlException(code, _line);  
                     }  
                 }  
   
                 if (*p != ';')  
                     throw XmlException(code, _line);  
   
                 rem -= p - start;  
             }  
         }  
     }  
 }  
   
 static const char _EMPTY_STRING[] = "";  
   
 void XmlParser::_getElement(char*& p, XmlEntry& entry) void XmlParser::_getElement(char*& p, XmlEntry& entry)
 { {
     entry.attributeCount = 0;  
   
     //--------------------------------------------------------------------------     //--------------------------------------------------------------------------
     // Get the element name (expect one of these: '?', '!', [A-Za-z_])     // Get the element name (expect one of these: '?', '!', [A-Za-z_])
     //--------------------------------------------------------------------------     //--------------------------------------------------------------------------
Line 651 
Line 1019 
         entry.type = XmlEntry::XML_DECLARATION;         entry.type = XmlEntry::XML_DECLARATION;
         entry.text = ++p;         entry.text = ++p;
  
         Boolean openCloseElement = false;          if (_getElementName(p, entry.localName))
   
         if (_getElementName(p))  
             return;             return;
     }     }
     else if (*p == '!')     else if (*p == '!')
Line 676 
Line 1042 
             entry.type = XmlEntry::CDATA;             entry.type = XmlEntry::CDATA;
             entry.text = p;             entry.text = p;
             _getCData(p);             _getCData(p);
               entry.textLen = strlen(entry.text);
             return;             return;
         }         }
         else if (memcmp(p, "DOCTYPE", 7) == 0)         else if (memcmp(p, "DOCTYPE", 7) == 0)
         {         {
             entry.type = XmlEntry::DOCTYPE;             entry.type = XmlEntry::DOCTYPE;
             entry.text = _EMPTY_STRING;              entry.text = "";
             _getDocType(p);             _getDocType(p);
             return;             return;
         }         }
Line 692 
Line 1059 
         entry.type = XmlEntry::END_TAG;         entry.type = XmlEntry::END_TAG;
         entry.text = ++p;         entry.text = ++p;
  
         if (!_getElementName(p))          if (!_getElementName(p, entry.localName))
             throw(XmlException(XmlException::BAD_END_TAG, _line));             throw(XmlException(XmlException::BAD_END_TAG, _line));
  
         return;         return;
     }     }
     else if (isalpha(*p) || *p == '_')      else if (CharSet::isAlphaUnder(Uint8(*p)))
     {     {
         entry.type = XmlEntry::START_TAG;         entry.type = XmlEntry::START_TAG;
         entry.text = p;         entry.text = p;
  
         Boolean openCloseElement = false;         Boolean openCloseElement = false;
  
         if (_getOpenElementName(p, openCloseElement))          if (_getOpenElementName(p, entry.localName, openCloseElement))
         {         {
             if (openCloseElement)             if (openCloseElement)
                 entry.type = XmlEntry::EMPTY_TAG;                 entry.type = XmlEntry::EMPTY_TAG;
Line 741 
Line 1108 
         }         }
  
         XmlAttribute attr;         XmlAttribute attr;
           attr.nsType = -1;
         attr.name = p;         attr.name = p;
         _getAttributeNameAndEqual(p);          _getAttributeNameAndEqual(p, attr.localName);
  
         if (*p != '"' && *p != '\'')          // Get the attribute value (e.g., "some value")
           {
               if ((*p != '"') && (*p != '\''))
               {
             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
               }
   
               char quote = *p++;
   
               char* start;
               _normalizeAttributeValue(_line, p, quote, start);
               attr.value = start;
   
               if (*p != quote)
               {
                   throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
               }
   
               // Overwrite the closing quote with a null-terminator:
  
         attr.value = p + 1;              *p++ = '\0';
         _getAttributeValue(p);          }
  
         if (entry.type == XmlEntry::XML_DECLARATION)         if (entry.type == XmlEntry::XML_DECLARATION)
         {         {
             // The next thing must a space or a "?>":             // The next thing must a space or a "?>":
  
             if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))              if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
             {             {
                 throw XmlException(                 throw XmlException(
                     XmlException::BAD_ATTRIBUTE_VALUE, _line);                     XmlException::BAD_ATTRIBUTE_VALUE, _line);
             }             }
         }         }
         else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))          else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
         {         {
             // The next thing must be a space or a '>':             // The next thing must be a space or a '>':
  
             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);             throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
         }         }
  
         _skipWhitespace(p);          _skipWhitespace(_line, p);
   
         if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)  
             throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);  
  
         _substituteReferences((char*)attr.value);          entry.attributes.append(attr);
         entry.attributes[entry.attributeCount++] = attr;  
     }     }
 } }
  
Line 805 
Line 1186 
  
     PEGASUS_STD(cout) << '\n';     PEGASUS_STD(cout) << '\n';
  
     for (Uint32 i = 0; i < attributeCount; i++)      for (Uint32 i = 0, n = attributes.size(); i < n; i++)
     {     {
         PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";         PEGASUS_STD(cout) << "    " << attributes[i].name << "=\"";
         _printValue(attributes[i].value);         _printValue(attributes[i].value);
Line 816 
Line 1197 
 const XmlAttribute* XmlEntry::findAttribute( const XmlAttribute* XmlEntry::findAttribute(
     const char* name) const     const char* name) const
 { {
     for (Uint32 i = 0; i < attributeCount; i++)      for (Uint32 i = 0, n = attributes.size(); i < n; i++)
     {     {
         if (strcmp(attributes[i].name, name) == 0)         if (strcmp(attributes[i].name, name) == 0)
             return &attributes[i];             return &attributes[i];
Line 825 
Line 1206 
     return 0;     return 0;
 } }
  
   const XmlAttribute* XmlEntry::findAttribute(
       int attrNsType,
       const char* name) const
   {
       for (Uint32 i = 0, n = attributes.size(); i < n; i++)
       {
           if ((attributes[i].nsType == attrNsType) &&
               (strcmp(attributes[i].localName, name) == 0))
           {
               return &attributes[i];
           }
       }
   
       return 0;
   }
   
 // Find first non-whitespace character (set first) and last non-whitespace // Find first non-whitespace character (set first) and last non-whitespace
 // character (set last one past this). For example, consider this string: // character (set last one past this). For example, consider this string:
 // //
Line 840 
Line 1237 
 { {
     first = str;     first = str;
  
     while (isspace(*first))      while (_isspace(*first))
         first++;         first++;
  
     if (!*first)     if (!*first)
Line 851 
Line 1248 
  
     last = first + strlen(first);     last = first + strlen(first);
  
     while (last != first && isspace(last[-1]))      while (last != first && _isspace(last[-1]))
         last--;         last--;
 } }
  
Line 897 
Line 1294 
     if (!end || end != last)     if (!end || end != last)
         return false;         return false;
  
     value = Uint32(tmp);      value = static_cast<Real32>(tmp);
     return true;     return true;
 } }
  
Line 921 
Line 1318 
     if (!getAttributeValue(name, tmp))     if (!getAttributeValue(name, tmp))
         return false;         return false;
  
     value = tmp;      value = String(tmp);
     return true;     return true;
 } }
  
 void XmlAppendCString(Array<Sint8>& out, const char* str)  void XmlAppendCString(Buffer& out, const char* str)
 { {
     out.append(str, strlen(str));      out.append(str, static_cast<Uint32>(strlen(str)));
 } }
  
 PEGASUS_NAMESPACE_END PEGASUS_NAMESPACE_END


Legend:
Removed from v.1.16  
changed lines
  Added in v.1.56

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2