pegasus/src/Pegasus/Common/XmlParser.cpp - diff

Return to XmlParser.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/XmlParser.cpp between version 1.9 and 1.42

version 1.9, 2001/06/03 17:05:03

version 1.42, 2007/07/30 06:50:57

Line 1

//%/////////////////////////////////////////////////////////////////////////////

//%2006////////////////////////////////////////////////////////////////////////

// Permission is hereby granted, free of charge, to any person obtaining a

// copy of this software and associated documentation files (the "Software"),

// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.

// to deal in the Software without restriction, including without limitation

// the rights to use, copy, modify, merge, publish, distribute, sublicense,

// IBM Corp.; EMC Corporation, The Open Group.

// and/or sell copies of the Software, and to permit persons to whom the

// Software is furnished to do so, subject to the following conditions:

// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

// EMC Corporation; VERITAS Software Corporation; The Open Group.

// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL

// EMC Corporation; Symantec Corporation; The Open Group.

// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

// Permission is hereby granted, free of charge, to any person obtaining a copy

// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

// of this software and associated documentation files (the "Software"), to

// DEALINGS IN THE SOFTWARE.

// deal in the Software without restriction, including without limitation the

// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

// sell copies of the Software, and to permit persons to whom the Software is

// furnished to do so, subject to the following conditions:

// THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN

// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED

// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT

// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR

// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT

// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

//==============================================================================

// Author: Mike Brasher (mbrasher@bmc.com)

// Modified By:

//%/////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

Line 48

Line 54

// &quot - full quote

// &apos - apostrophe

// as well as character (numeric) references:

// 1 - decimal reference for character '1'

// 1 - hexadecimal reference for character '1'

// 4. Element names and attribute names take the following form:

// [A-Za-z_][A-Za-z_0-9-.:]

Line 67

Line 78

// TODO:

// Handle <!DOCTYPE...> sections which are complicated (containing

// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is

// work. Handle <!DOCTYPE...> sections which are complicated (containing

// rules rather than references to files).

// Handle reference of this form: "Α"

// Remove newlines from string literals:

// Example: <xyz x="hello

Line 79

Line 89

////////////////////////////////////////////////////////////////////////////////

#include <Pegasus/Common/Config.h>

#include <cctype>

#include <cassert>

#include <cstdio>

#include <cstdlib>

#include <cstring>

#include "XmlParser.h"

#include "Logger.h"

#include "ExceptionRep.h"

#include "CharSet.h"

PEGASUS_NAMESPACE_BEGIN

#define PEGASUS_ARRAY_T XmlEntry

# include "ArrayImpl.h"

#undef PEGASUS_ARRAY_T

////////////////////////////////////////////////////////////////////////////////

// Static helper functions

Line 122

Line 129

char replacement;

};

// ATTN: Add support for more entity references

static EntityReference _references[] =

{

{ "&", 5, '&' },

Line 131

Line 139

{ "'", 6, '\'' }

};

static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));

// Remove all redundant spaces from the given string:

static void _normalize(char* text)

{

Uint32 length = strlen(text);

char* p = text;

char* end = p + length;

// Remove leading spaces:

while (isspace(*p))

p++;

if (p != text)

memmove(text, p, end - p + 1);

p = text;

// Implements a check for a whitespace character, without calling

// isspace( ). The isspace( ) function is locale-sensitive,

// Look for sequences of more than one space and remove all but one.

// and incorrectly flags some chars above 0x7f as whitespace. This

// causes the XmlParser to incorrectly parse UTF-8 data.

for (;;)

// Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)

// defines white space as:

// S ::= (#x20 | #x9 | #xD | #xA)+

static inline int _isspace(char c)

{

// Advance to the next space:

return CharSet::isXmlWhiteSpace((Uint8)c);

while (*p && !isspace(*p))

p++;

if (!*p)

break;

// Advance to the next non-space:

char* q = p++;

while (isspace(*p))

p++;

// Discard trailing spaces (if we are at the end):

if (!*p)

{

*q = '\0';

break;

}

// Remove the redundant spaces:

static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));

Uint32 n = p - q;

if (n > 1)

{

*q++ = ' ';

memmove(q, p, end - p + 1);

p = q;

}

////////////////////////////////////////////////////////////////////////////////

Line 218

Line 182

"Semantic error"

};

static const char* _xmlKeys[] =

{

"Common.XmlParser.BAD_START_TAG",

"Common.XmlParser.BAD_END_TAG",

"Common.XmlParser.BAD_ATTRIBUTE_NAME",

"Common.XmlParser.EXPECTED_EQUAL_SIGN",

"Common.XmlParser.BAD_ATTRIBUTE_VALUE",

"Common.XmlParser.MINUS_MINUS_IN_COMMENT",

"Common.XmlParser.UNTERMINATED_COMMENT",

"Common.XmlParser.UNTERMINATED_CDATA",

"Common.XmlParser.UNTERMINATED_DOCTYPE",

"Common.XmlParser.TOO_MANY_ATTRIBUTES",

"Common.XmlParser.MALFORMED_REFERENCE",

"Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",

"Common.XmlParser.START_END_MISMATCH",

"Common.XmlParser.UNCLOSED_TAGS",

"Common.XmlParser.MULTIPLE_ROOTS",

"Common.XmlParser.VALIDATION_ERROR",

"Common.XmlParser.SEMANTIC_ERROR"

};

// l10n replace _formMessage (comment out the old one)

static String _formMessage(Uint32 code, Uint32 line, const String& message)

{

String result = _xmlMessages[Uint32(code) - 1];

Line 235

Line 222

return result;

}

static MessageLoaderParms _formMessage(

Uint32 code,

Uint32 line,

const String& message)

{

String dftMsg = _xmlMessages[Uint32(code) - 1];

String key = _xmlKeys[Uint32(code) - 1];

String msg = message;

dftMsg.append(": on line $0");

if (message.size())

{

msg = ": " + msg;

dftMsg.append("$1");

}

return MessageLoaderParms(key, dftMsg, line ,msg);

}

static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)

{

String dftMsg = _xmlMessages[Uint32(code) - 1];

String key = _xmlKeys[Uint32(code) - 1];

dftMsg.append(": on line $0");

return MessageLoaderParms(key, dftMsg, line);

}

XmlException::XmlException(

XmlException::Code code,

Line 245

Line 263

}

XmlException::XmlException(

XmlException::Code code,

Uint32 lineNumber,

MessageLoaderParms& msgParms)

: Exception(_formPartialMessage(code, lineNumber))

{

if (msgParms.default_msg.size())

{

msgParms.default_msg = ": " + msgParms.default_msg;

}

_rep->message.append(MessageLoader::getMessage(msgParms));

}

////////////////////////////////////////////////////////////////////////////////

// XmlValidationError

Line 256

Line 289

const String& message)

: XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)

{

}

XmlValidationError::XmlValidationError(

Uint32 lineNumber,

MessageLoaderParms& msgParms)

: XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)

{

}

////////////////////////////////////////////////////////////////////////////////

// XmlSemanticError

Line 270

Line 311

const String& message)

: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)

{

}

XmlSemanticError::XmlSemanticError(

Uint32 lineNumber,

MessageLoaderParms& msgParms)

: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)

{

}

////////////////////////////////////////////////////////////////////////////////

// XmlParser

////////////////////////////////////////////////////////////////////////////////

XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),

XmlParser::XmlParser(char* text)

_restoreChar('\0'), _foundRoot(false)

: _line(1),

_current(text),

_restoreChar('\0'),

_foundRoot(false)

{

}

inline void _skipWhitespace(Uint32& line, char*& p)

{

while (*p && _isspace(*p))

{

if (*p == '\n')

line++;

p++;

}

static int _getEntityRef(char*& p)

{

if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))

{

p += 3;

return '>';

}

if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))

{

p += 3;

return '<';

}

if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&

(p[4] == ';'))

{

p += 5;

return '\'';

}

if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&

(p[4] == ';'))

{

p += 5;

return '"';

}

if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))

{

p += 4;

return '&';

}

return -1;

}

static inline int _getCharRef(char*& p, bool hex)

{

char* end;

unsigned long ch;

if (hex)

{

ch = strtoul(p, &end, 16);

}

else

{

ch = strtoul(p, &end, 10);

}

if ((end == p) || (*end != ';') || (ch > 255))

{

return -1;

}

if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))

{

return -1;

}

p = end + 1;

return ch;

}

static void _normalize(Uint32& line, char*& p, char end_char, char*& start)

{

// Skip over leading whitespace:

_skipWhitespace(line, p);

start = p;

// Process one character at a time:

char* q = p;

while (*p && (*p != end_char))

{

if (_isspace(*p))

{

// Compress sequences of whitespace characters to a single space

// character. Update line number when newlines encountered.

if (*p++ == '\n')

{

line++;

}

*q++ = ' ';

_skipWhitespace(line, p);

}

else if (*p == '&')

{

// Process entity characters and entity references:

p++;

int ch;

if (*p == '#')

{

*p++;

if (*p == 'x')

{

p++;

ch = _getCharRef(p, true);

}

else

{

ch = _getCharRef(p, false);

}

else

{

ch = _getEntityRef(p);

}

if (ch == -1)

{

throw XmlException(XmlException::MALFORMED_REFERENCE, line);

}

*q++ = ch;

}

else

{

*q++ = *p++;

}

// We encountered a the end_char or a zero-terminator.

Boolean XmlParser::next(XmlEntry& entry)

*q = *p;

// Remove single trailing whitespace (consecutive whitespaces already

// compressed above). Since p >= q, we can tell if we need to strip a

// trailing space from q by looking at the end of p. We must not look at

// the last character of p, though, if p is an empty string.

if ((p != start) && _isspace(p[-1]))

{

q--;

}

// If q got behind p, it is safe and necessary to null-terminate q

if (q != p)

{

*q = '\0';

}

Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment)

{

if (!_putBackStack.isEmpty())

{

Line 307

Line 523

_restoreChar = '\0';

}

// Loop until we are done with comments if includeComment is false.

{

// Skip over any whitespace:

_skipWhitespace(_line, _current);

_skipWhitespace(_current);

if (!*_current)

{

Line 350

Line 568

_stack.pop();

}

return true;

}

else

{

// Normalize the content:

char* start;

_normalize(_line, _current, '<', start);

// Get the content:

entry.type = XmlEntry::CONTENT;

entry.text = _current;

entry.text = start;

_getContent(_current);

// Overwrite '<' with a null character (temporarily).

_restoreChar = *_current;

*_current = '\0';

if (nullTerminator)

*nullTerminator = '\0';

}

_substituteReferences((char*)entry.text);

}while (!includeComment && entry.type == XmlEntry::COMMENT);

_normalize((char*)entry.text);

return true;

}

void XmlParser::putBack(XmlEntry& entry)

{

Line 381

Line 604

// Nothing to do!

}

void XmlParser::_skipWhitespace(char*& p)

// A-Za-z0-9_-:.

static unsigned char _isInnerElementChar[] =

{

while (*p && isspace(*p))

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

{

0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,

if (*p == '\n')

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

_line++;

1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

p++;

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

}

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

}

};

Boolean XmlParser::_getElementName(char*& p)

{

if (!isalpha(*p) && *p != '_')

if (!CharSet::isAlNumUnder(Uint8(*p)))

throw XmlException(XmlException::BAD_START_TAG, _line);

while (*p &&

p++;

(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))

while (*p && _isInnerElementChar[Uint8(*p)])

p++;

// The next character must be a space:

if (isspace(*p))

if (_isspace(*p))

{

*p++ = '\0';

_skipWhitespace(p);

_skipWhitespace(_line, p);

}

if (*p == '>')

Line 422

Line 647

{

openCloseElement = false;

if (!isalpha(*p) && *p != '_')

if (!CharSet::isAlNumUnder(Uint8(*p)))

throw XmlException(XmlException::BAD_START_TAG, _line);

while (*p &&

p++;

(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))

while (*p && _isInnerElementChar[Uint8(*p)])

p++;

// The next character must be a space:

if (isspace(*p))

if (_isspace(*p))

{

*p++ = '\0';

_skipWhitespace(p);

_skipWhitespace(_line, p);

}

if (*p == '>')

Line 456

Line 682

void XmlParser::_getAttributeNameAndEqual(char*& p)

{

if (!isalpha(*p) && *p != '_')

if (!CharSet::isAlNumUnder((Uint8)*p))

throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);

while (*p &&

p++;

(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))

while (*p && _isInnerElementChar[Uint8(*p)])

p++;

char* term = p;

_skipWhitespace(p);

_skipWhitespace(_line, p);

if (*p != '=')

throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);

p++;

_skipWhitespace(p);

_skipWhitespace(_line, p);

*term = '\0';

}

void XmlParser::_getAttributeValue(char*& p)

{

// ATTN-B: handle values contained in semiquotes:

if (*p != '"' && *p != '\'')

throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);

char startChar = *p++;

while (*p && *p != startChar)

p++;

if (*p != startChar)

throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);

*p++ = '\0';

}

void XmlParser::_getComment(char*& p)

{

// Now p points to first non-whitespace character beyond "<--" sequence:

Line 559

Line 768

p++;

}

void XmlParser::_getContent(char*& p)

{

while (*p && *p != '<')

{

if (*p == '\n')

_line++;

p++;

}

void XmlParser::_substituteReferences(char* text)

{

Uint32 rem = strlen(text);

for (char* p = text; *p; p++, rem--)

{

if (*p == '&')

{

// Look for predefined entity reference:

Boolean found = false;

for (Uint32 i = 0; i < _REFERENCES_SIZE; i++)

{

Uint32 length = _references[i].length;

const char* match = _references[i].match;

if (strncmp(p, _references[i].match, length) == 0)

{

found = true;

*p = _references[i].replacement;

char* q = p + length;

rem = rem - length + 1;

memmove(p + 1, q, rem);

}

// If not found, then at least make sure it is well formed:

if (!found)

{

char* start = p;

p++;

XmlException::Code code = XmlException::MALFORMED_REFERENCE;

if (isalpha(*p) || *p == '_')

{

for (p++; *p && *p != ';'; p++)

{

if (!isalnum(*p) && *p != '_')

throw XmlException(code, _line);

}

else if (*p == '#')

{

for (p++ ; *p && *p != ';'; p++)

{

if (!isdigit(*p))

throw XmlException(code, _line);

}

if (*p != ';')

throw XmlException(code, _line);

rem -= p - start;

}

static const char _EMPTY_STRING[] = "";

void XmlParser::_getElement(char*& p, XmlEntry& entry)

{

entry.attributeCount = 0;

Line 677

Line 811

else if (memcmp(p, "DOCTYPE", 7) == 0)

{

entry.type = XmlEntry::DOCTYPE;

entry.text = _EMPTY_STRING;

entry.text = "";

_getDocType(p);

return;

}

Line 693

Line 827

return;

}

else if (isalpha(*p) || *p == '_')

else if ((((*p >= 'A') && (*p <= 'Z')) ||

((*p >= 'a') && (*p <= 'z')) ||

(*p == '_')))

{

entry.type = XmlEntry::START_TAG;

entry.text = p;

Line 740

Line 876

attr.name = p;

_getAttributeNameAndEqual(p);

if (*p != '"' && *p != '\'')

// Get the attribute value (e.g., "some value")

{

if ((*p != '"') && (*p != '\''))

{

throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);

}

char quote = *p++;

char* start;

_normalize(_line, p, quote, start);

attr.value = start;

if (*p != quote)

{

throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);

}

attr.value = p + 1;

// Overwrite the closing quote with a null-terminator:

_getAttributeValue(p);

*p++ = '\0';

}

if (entry.type == XmlEntry::XML_DECLARATION)

{

// The next thing must a space or a "?>":

if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))

if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))

{

throw XmlException(

XmlException::BAD_ATTRIBUTE_VALUE, _line);

}

else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))

else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))

{

// The next thing must be a space or a '>':

throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);

}

_skipWhitespace(p);

_skipWhitespace(_line, p);

if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)

throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);

_substituteReferences((char*)attr.value);

entry.attributes[entry.attributeCount++] = attr;

}

Line 836

Line 988

{

first = str;

while (isspace(*first))

while (_isspace(*first))

first++;

if (!*first)

Line 847

Line 999

last = first + strlen(first);

while (last != first && isspace(last[-1]))

while (last != first && _isspace(last[-1]))

last--;

}

Line 893

Line 1045

if (!end || end != last)

return false;

value = Uint32(tmp);

value = static_cast<Real32>(tmp);

return true;

}

Line 917

Line 1069

if (!getAttributeValue(name, tmp))

return false;

value = tmp;

value = String(tmp);

return true;

}

void XmlAppendCString(Array<Sint8>& out, const char* str)

void XmlAppendCString(Buffer& out, const char* str)

{

out.append(str, strlen(str));

out.append(str, static_cast<Uint32>(strlen(str)));

}

PEGASUS_NAMESPACE_END

Legend:

Removed from v.1.9
changed lines
	Added in v.1.42

No CVS admin address has been configured