pegasus/src/Pegasus/Common/XmlParser.cpp - diff

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/XmlParser.cpp between version 1.41 and 1.56

version 1.41, 2007/02/28 16:10:06

version 1.56, 2012/07/26 11:14:54

Line 1

//%2006////////////////////////////////////////////////////////////////////////

//%LICENSE////////////////////////////////////////////////////////////////

// Licensed to The Open Group (TOG) under one or more contributor license

// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.

// agreements. Refer to the OpenPegasusNOTICE.txt file distributed with

// this work for additional information regarding copyright ownership.

// IBM Corp.; EMC Corporation, The Open Group.

// Each contributor licenses this file to you under the OpenPegasus Open

// Source License; you may not use this file except in compliance with the

// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.

// License.

// EMC Corporation; VERITAS Software Corporation; The Open Group.

// Permission is hereby granted, free of charge, to any person obtaining a

// copy of this software and associated documentation files (the "Software"),

// EMC Corporation; Symantec Corporation; The Open Group.

// to deal in the Software without restriction, including without limitation

// the rights to use, copy, modify, merge, publish, distribute, sublicense,

// Permission is hereby granted, free of charge, to any person obtaining a copy

// and/or sell copies of the Software, and to permit persons to whom the

// of this software and associated documentation files (the "Software"), to

// Software is furnished to do so, subject to the following conditions:

// deal in the Software without restriction, including without limitation the

// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

// The above copyright notice and this permission notice shall be included

// sell copies of the Software, and to permit persons to whom the Software is

// in all copies or substantial portions of the Software.

// furnished to do so, subject to the following conditions:

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

// THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN

// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED

// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT

// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR

// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT

// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

//==============================================================================

//////////////////////////////////////////////////////////////////////////

//%/////////////////////////////////////////////////////////////////////////////

Line 129

Line 127

char replacement;

};

// ATTN: Add support for more entity references

static EntityReference _references[] =

{

{ "&", 5, '&' },

{ "<", 4, '<' },

{ ">", 4, '>' },

{ """, 6, '"' },

{ "'", 6, '\'' }

};

// Implements a check for a whitespace character, without calling

// isspace( ). The isspace( ) function is locale-sensitive,

// and incorrectly flags some chars above 0x7f as whitespace. This

Line 153

Line 140

return CharSet::isXmlWhiteSpace((Uint8)c);

}

static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));

////////////////////////////////////////////////////////////////////////////////

Line 172

Line 158

"Unterminated comment",

"Unterminated CDATA block",

"Unterminated DOCTYPE",

"Too many attributes: parser only handles 10",

"Malformed reference",

"Expected a comment or CDATA following \"<!\" sequence",

"Closing element does not match opening element",

"One or more tags are still open",

"More than one root element was encountered",

"Validation error",

"Semantic error"

"Semantic error",

"Namespace not declared"

};

static const char* _xmlKeys[] =

Line 193

Line 179

"Common.XmlParser.UNTERMINATED_COMMENT",

"Common.XmlParser.UNTERMINATED_CDATA",

"Common.XmlParser.UNTERMINATED_DOCTYPE",

"Common.XmlParser.TOO_MANY_ATTRIBUTES",

"Common.XmlParser.MALFORMED_REFERENCE",

"Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",

"Common.XmlParser.START_END_MISMATCH",

"Common.XmlParser.UNCLOSED_TAGS",

"Common.XmlParser.MULTIPLE_ROOTS",

"Common.XmlParser.VALIDATION_ERROR",

"Common.XmlParser.SEMANTIC_ERROR"

"Common.XmlParser.SEMANTIC_ERROR",

"Common.XmlParser.UNDECLARED_NAMESPACE"

};

// l10n replace _formMessage (comment out the old one)

static String _formMessage(Uint32 code, Uint32 line, const String& message)

{

String result = _xmlMessages[Uint32(code) - 1];

char buffer[32];

sprintf(buffer, "%d", line);

result.append(": on line ");

result.append(buffer);

if (message.size())

{

result.append(": ");

result.append(message);

}

return result;

}

static MessageLoaderParms _formMessage(

Uint32 code,

Line 230

Line 196

const String& message)

{

String dftMsg = _xmlMessages[Uint32(code) - 1];

String key = _xmlKeys[Uint32(code) - 1];

const char* key = _xmlKeys[Uint32(code) - 1];

String msg = message;

dftMsg.append(": on line $0");

Line 240

Line 206

dftMsg.append("$1");

}

return MessageLoaderParms(key, dftMsg, line ,msg);

return MessageLoaderParms(key, dftMsg.getCString(), line ,msg);

}

static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)

{

String dftMsg = _xmlMessages[Uint32(code) - 1];

String key = _xmlKeys[Uint32(code) - 1];

const char* key = _xmlKeys[Uint32(code) - 1];

dftMsg.append(": on line $0");

return MessageLoaderParms(key, dftMsg, line);

return MessageLoaderParms(key, dftMsg.getCString(), line);

}

Line 328

Line 294

////////////////////////////////////////////////////////////////////////////////

XmlParser::XmlParser(char* text)

XmlParser::XmlParser(char* text, XmlNamespace* ns, Boolean hideEmptyTags)

: _line(1),

_text(text),

_current(text),

_restoreChar('\0'),

_foundRoot(false)

_foundRoot(false),

_supportedNamespaces(ns),

// Start valid indexes with -2. -1 is reserved for not found.

_currentUnsupportedNSType(-2),

_hideEmptyTags(hideEmptyTags)

{

}

Line 348

Line 317

}

#if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \

defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)

#pragma optimize( "", off )

#endif

static int _getEntityRef(char*& p)

{

if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))

Line 384

Line 357

return -1;

}

#if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \

defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC)

#pragma optimize( "", on )

#endif

static inline int _getCharRef(char*& p, bool hex)

static inline int _getCharRef(char*& p)

{

char* end;

unsigned long ch;

Boolean hex = false;

if (hex)

if (*p == 'x')

{

ch = strtoul(p, &end, 16);

hex = true;

ch = strtoul(++p, &end, 16);

}

else

{

Line 414

Line 393

return ch;

}

static void _normalize(Uint32& line, char*& p, char end_char, char*& start)

// Parse an entity reference or a character reference

static inline int _getRef(Uint32 line, char*& p)

{

// Skip over leading whitespace:

int ch;

_skipWhitespace(line, p);

if (*p == '#')

start = p;

{

ch = _getCharRef(++p);

}

else

{

ch = _getEntityRef(p);

}

if (ch == -1)

{

throw XmlException(XmlException::MALFORMED_REFERENCE, line);

}

return ch;

}

static inline void _normalizeElementValue(

Uint32& line,

char*& p,

Uint32 &textLen)

{

// Process one character at a time:

char* q = p;

char *start = p;

while (*p && (*p != end_char))

while (*p && (*p != '<'))

{

if (_isspace(*p))

{

// Compress sequences of whitespace characters to a single space

// Trim whitespace from the end of the value, but do not compress

// character. Update line number when newlines encountered.

// whitespace within the value.

const char* start = p;

if (*p++ == '\n')

{

line++;

}

*q++ = ' ';

_skipWhitespace(line, p);

}

else if (*p == '&')

{

// Process entity characters and entity references:

p++;

int ch;

if (*p == '#')

if (*p && (*p != '<'))

{

*p++;

// Transfer internal whitespace to q without compressing it.

const char* i = start;

if (*p == 'x')

while (i < p)

{

p++;

*q++ = *i++;

ch = _getCharRef(p, true);

}

else

{

ch = _getCharRef(p, false);

// Do not transfer trailing whitespace to q.

break;

}

else if (*p == '&')

{

// Process an entity reference or a character reference.

*q++ = _getRef(line, ++p);

}

else

{

ch = _getEntityRef(p);

*q++ = *p++;

}

if (ch == -1)

// If q got behind p, it is safe and necessary to null-terminate q

if (q != p)

{

throw XmlException(XmlException::MALFORMED_REFERENCE, line);

*q = '\0';

}

textLen = (Uint32)(q - start);

}

static inline void _normalizeAttributeValue(

Uint32& line,

char*& p,

char end_char,

char*& start)

{

// Skip over leading whitespace:

_skipWhitespace(line, p);

start = p;

// Process one character at a time:

char* q = p;

*q++ = ch;

while (*p && (*p != end_char))

{

if (_isspace(*p))

{

// Compress sequences of whitespace characters to a single space

// character. Update line number when newlines encountered.

if (*p++ == '\n')

{

line++;

}

*q++ = ' ';

_skipWhitespace(line, p);

}

else if (*p == '&')

{

// Process an entity reference or a character reference.

*q++ = _getRef(line, ++p);

}

else

{

Line 480

Line 520

}

// We encountered a the end_char or a zero-terminator.

*q = *p;

// Remove single trailing whitespace (consecutive whitespaces already

// compressed above). Since p >= q, we can tell if we need to strip a

// trailing space from q by looking at the end of p. We must not look at

// the last character of p, though, if p is an empty string.

Boolean adjust_q = (p != start) && _isspace(p[-1]);

// We encountered a the end_char or a zero-terminator.

if ((p != start) && _isspace(p[-1]))

*q = *p;

if (adjust_q)

{

q--;

}

Line 502

Line 543

}

Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)

Boolean XmlParser::_next(

XmlEntry& entry,

Boolean includeComment)

{

if (!_putBackStack.isEmpty())

{

Line 524

Line 567

_restoreChar = '\0';

}

entry.attributes.clear();

if (_supportedNamespaces)

{

// Remove namespaces of a deeper scope level from the stack.

while (!_nameSpaces.isEmpty() &&

_nameSpaces.top().scopeLevel > _stack.size())

{

_nameSpaces.pop();

}

// Loop until we are done with comments if includeComment is false.

{

Line 574

Line 629

{

// Normalize the content:

char* start;

char* start = _current;

_normalize(_line, _current, '<', start);

Uint32 textLen;

_normalizeElementValue(_line, _current, textLen);

// Get the content:

entry.type = XmlEntry::CONTENT;

entry.text = start;

entry.textLen = textLen;

// Overwrite '<' with a null character (temporarily).

Line 592

Line 649

}

}while (!includeComment && entry.type == XmlEntry::COMMENT);

if (_supportedNamespaces &&

(entry.type == XmlEntry::START_TAG ||

entry.type == XmlEntry::EMPTY_TAG ||

entry.type == XmlEntry::END_TAG))

{

// Determine the namespace type for this entry

if (entry.type == XmlEntry::START_TAG ||

entry.type == XmlEntry::EMPTY_TAG)

{

// Process namespace declarations and determine the namespace type

// for the attributes.

Uint32 scopeLevel = _stack.size();

if (entry.type == XmlEntry::EMPTY_TAG)

{

// Empty tags are deeper scope, but not pushed onto the stack

scopeLevel++;

}

for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++)

{

XmlAttribute& attr = entry.attributes[i];

if ((strncmp(attr.name, "xmlns:", 6) == 0) ||

(strcmp(attr.name, "xmlns") == 0))

{

// Process a namespace declaration

XmlNamespace ns;

if (attr.name[5] == ':')

{

ns.localName = attr.localName;

}

else

{

// Default name space has no local name

ns.localName = 0;

}

ns.extendedName = attr.value;

ns.scopeLevel = scopeLevel;

ns.type = _getSupportedNamespaceType(ns.extendedName);

// If the namespace is not supported, assign it a unique

// negative identifier.

if (ns.type == -1)

{

ns.type = _currentUnsupportedNSType--;

}

_nameSpaces.push(ns);

}

else

{

// Get the namespace type for this attribute.

attr.nsType = _getNamespaceType(attr.name);

}

entry.nsType = _getNamespaceType(entry.text);

}

else

{

entry.nsType = -1;

}

return true;

}

Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)

{

if (_hideEmptyTags)

{

// Get the next tag.

if (!_next(entry, includeComment))

return false;

// If an EMPTY_TAG is encountered, then convert it to a START_TAG and

// push a matching END_TAG on the put-back stack. This hides every

// EMPTY_TAG from the caller.

if (entry.type == XmlEntry::EMPTY_TAG)

{

entry.type = XmlEntry::START_TAG;

XmlEntry tmp;

tmp.type = XmlEntry::END_TAG;

tmp.text = entry.text;

tmp.nsType = entry.nsType;

tmp.localName = entry.localName;

_putBackStack.push(tmp);

}

return true;

}

else

return _next(entry, includeComment);

}

// Get the namespace type of the given tag

int XmlParser::_getNamespaceType(const char* tag)

{

const char* pos = strchr(tag, ':');

// If ':' is not found, the tag is not namespace qualified and we

// need to look for the default name space.

// Search the namespace stack from the top

for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)

{

// If ':' is found, look for the name space with the matching

// local name...

if ((pos && _nameSpaces[i].localName &&

!strncmp(_nameSpaces[i].localName, tag, pos - tag)) ||

// ... otherwise look for the default name space. It's the

// one with localName set to NULL

(!pos && !_nameSpaces[i].localName))

{

return _nameSpaces[i].type;

}

// If the tag is namespace qualified, but the name space has not been

// declared, it's malformed XML and we must throw an exception.

// Note: The "xml" namespace is specifically defined by the W3C as a

// reserved prefix ("http://www.w3.org/XML/1998/namespace").

if (pos && (strncmp(tag, "xml:", 4) != 0))

{

throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line);

}

// Otherwise it's OK not to have a name space.

return -1;

}

// Given the extended namespace name, find it in the table of supported

// namespaces and return its type.

int XmlParser::_getSupportedNamespaceType(const char* extendedName)

{

for (Sint32 i = 0;

_supportedNamespaces[i].localName != 0;

i++)

{

PEGASUS_ASSERT(_supportedNamespaces[i].type == i);

if (!strcmp(_supportedNamespaces[i].extendedName, extendedName))

{

return _supportedNamespaces[i].type;

}

return -1;

}

XmlNamespace* XmlParser::getNamespace(int nsType)

{

for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--)

{

if (_nameSpaces[i].type == nsType)

{

return &_nameSpaces[i];

}

return 0;

}

void XmlParser::putBack(XmlEntry& entry)

{

_putBackStack.push(entry);

Line 605

Line 824

// Nothing to do!

}

// A-Za-z0-9_-:.

// A-Za-z0-9_-. (Note that ':' is not included and must be checked separately)

static unsigned char _isInnerElementChar[] =

{

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,

0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

};

Boolean XmlParser::_getElementName(char*& p)

inline Boolean _getQName(char*& p, const char*& localName)

{

localName = p;

if (!CharSet::isAlNumUnder(Uint8(*p)))

throw XmlException(XmlException::BAD_START_TAG, _line);

return false;

p++;

// No explicit test for NULL termination is needed.

// On position 0 of the array false is returned.

while (_isInnerElementChar[Uint8(*p)])

p++;

while (*p && _isInnerElementChar[Uint8(*p)])

// We've validated the prefix, now validate the local name

if (*p == ':')

{

localName = ++p;

if (!CharSet::isAlNumUnder(Uint8(*p)))

return false;

p++;

// No explicit test for NULL termination is needed.

// On position 0 of the array false is returned.

while (_isInnerElementChar[Uint8(*p)])

p++;

}

return true;

}

Boolean XmlParser::_getElementName(char*& p, const char*& localName)

{

if (!_getQName(p, localName))

throw XmlException(XmlException::BAD_START_TAG, _line);

// The next character must be a space:

Line 644

Line 890

return false;

}

Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)

Boolean XmlParser::_getOpenElementName(

char*& p,

const char*& localName,

Boolean& openCloseElement)

{

openCloseElement = false;

if (!CharSet::isAlNumUnder(Uint8(*p)))

if (!_getQName(p, localName))

throw XmlException(XmlException::BAD_START_TAG, _line);

p++;

while (*p && _isInnerElementChar[Uint8(*p)])

p++;

// The next character must be a space:

if (_isspace(*p))

Line 681

Line 925

return false;

}

void XmlParser::_getAttributeNameAndEqual(char*& p)

void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName)

{

if (!CharSet::isAlNumUnder((Uint8)*p))

if (!_getQName(p, localName))

throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);

p++;

while (*p && _isInnerElementChar[Uint8(*p)])

p++;

char* term = p;

_skipWhitespace(_line, p);

Line 771

Line 1010

void XmlParser::_getElement(char*& p, XmlEntry& entry)

{

entry.attributeCount = 0;

//--------------------------------------------------------------------------

// Get the element name (expect one of these: '?', '!', [A-Za-z_])

//--------------------------------------------------------------------------

Line 782

Line 1019

entry.type = XmlEntry::XML_DECLARATION;

entry.text = ++p;

Boolean openCloseElement = false;

if (_getElementName(p, entry.localName))

if (_getElementName(p))

return;

}

else if (*p == '!')

Line 807

Line 1042

entry.type = XmlEntry::CDATA;

entry.text = p;

_getCData(p);

entry.textLen = strlen(entry.text);

return;

}

else if (memcmp(p, "DOCTYPE", 7) == 0)

Line 823

Line 1059

entry.type = XmlEntry::END_TAG;

entry.text = ++p;

if (!_getElementName(p))

if (!_getElementName(p, entry.localName))

throw(XmlException(XmlException::BAD_END_TAG, _line));

return;

}

else if ((((*p >= 'A') && (*p <= 'Z')) ||

else if (CharSet::isAlphaUnder(Uint8(*p)))

((*p >= 'a') && (*p <= 'z')) ||

(*p == '_')))

{

entry.type = XmlEntry::START_TAG;

entry.text = p;

Boolean openCloseElement = false;

if (_getOpenElementName(p, openCloseElement))

if (_getOpenElementName(p, entry.localName, openCloseElement))

{

if (openCloseElement)

entry.type = XmlEntry::EMPTY_TAG;

Line 874

Line 1108

}

XmlAttribute attr;

attr.nsType = -1;

attr.name = p;

_getAttributeNameAndEqual(p);

_getAttributeNameAndEqual(p, attr.localName);

// Get the attribute value (e.g., "some value")

{

Line 887

Line 1122

char quote = *p++;

char* start;

_normalize(_line, p, quote, start);

_normalizeAttributeValue(_line, p, quote, start);

attr.value = start;

if (*p != quote)

Line 919

Line 1154

_skipWhitespace(_line, p);

if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)

entry.attributes.append(attr);

throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);

entry.attributes[entry.attributeCount++] = attr;

}

Line 954

Line 1186

PEGASUS_STD(cout) << '\n';

for (Uint32 i = 0; i < attributeCount; i++)

for (Uint32 i = 0, n = attributes.size(); i < n; i++)

{

PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";

_printValue(attributes[i].value);

Line 965

Line 1197

const XmlAttribute* XmlEntry::findAttribute(

const char* name) const

{

for (Uint32 i = 0; i < attributeCount; i++)

for (Uint32 i = 0, n = attributes.size(); i < n; i++)

{

if (strcmp(attributes[i].name, name) == 0)

return &attributes[i];

Line 974

Line 1206

return 0;

}

const XmlAttribute* XmlEntry::findAttribute(

int attrNsType,

const char* name) const

{

for (Uint32 i = 0, n = attributes.size(); i < n; i++)

{

if ((attributes[i].nsType == attrNsType) &&

(strcmp(attributes[i].localName, name) == 0))

{

return &attributes[i];

}

return 0;

}

// Find first non-whitespace character (set first) and last non-whitespace

// character (set last one past this). For example, consider this string:

Legend:

Removed from v.1.41
changed lines
	Added in v.1.56

No CVS admin address has been configured