version 1.29, 2004/10/17 20:39:18
|
version 1.43.2.11, 2008/04/02 00:53:42
|
|
|
//%2004//////////////////////////////////////////////////////////////////////// |
//%2006//////////////////////////////////////////////////////////////////////// |
// | // |
// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development | // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development |
// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. | // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. |
|
|
// IBM Corp.; EMC Corporation, The Open Group. | // IBM Corp.; EMC Corporation, The Open Group. |
// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; | // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; |
// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. | // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. |
|
// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
|
// EMC Corporation; VERITAS Software Corporation; The Open Group. |
|
// Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
|
// EMC Corporation; Symantec Corporation; The Open Group. |
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a copy | // Permission is hereby granted, free of charge, to any person obtaining a copy |
// of this software and associated documentation files (the "Software"), to | // of this software and associated documentation files (the "Software"), to |
|
|
// | // |
//============================================================================== | //============================================================================== |
// | // |
// Author: Mike Brasher (mbrasher@bmc.com) |
|
// |
|
// Modified By: |
|
// |
|
//%///////////////////////////////////////////////////////////////////////////// | //%///////////////////////////////////////////////////////////////////////////// |
| |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
|
|
// &apos - apostrophe | // &apos - apostrophe |
// | // |
// as well as character (numeric) references: | // as well as character (numeric) references: |
|
// |
// 1 - decimal reference for character '1' | // 1 - decimal reference for character '1' |
// 1 - hexadecimal reference for character '1' | // 1 - hexadecimal reference for character '1' |
// | // |
|
|
// | // |
// TODO: | // TODO: |
// | // |
// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work. |
// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is |
// Handle <!DOCTYPE...> sections which are complicated (containing |
// work. Handle <!DOCTYPE...> sections which are complicated (containing |
// rules rather than references to files). | // rules rather than references to files). |
// | // |
// Remove newlines from string literals: | // Remove newlines from string literals: |
|
|
#include "XmlParser.h" | #include "XmlParser.h" |
#include "Logger.h" | #include "Logger.h" |
#include "ExceptionRep.h" | #include "ExceptionRep.h" |
|
#include "CharSet.h" |
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
#define PEGASUS_ARRAY_T XmlEntry |
|
# include "ArrayImpl.h" |
|
#undef PEGASUS_ARRAY_T |
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// Static helper functions | // Static helper functions |
|
|
// Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) | // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) |
// defines white space as: | // defines white space as: |
// S ::= (#x20 | #x9 | #xD | #xA)+ | // S ::= (#x20 | #x9 | #xD | #xA)+ |
static int _isspace(char c) |
static inline int _isspace(char c) |
{ | { |
if (c == ' ' || c == '\r' || c == '\t' || c == '\n') |
return CharSet::isXmlWhiteSpace((Uint8)c); |
return 1; |
|
return 0; |
|
} | } |
| |
|
|
static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); | static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); |
| |
// Remove all redundant spaces from the given string: |
|
|
|
static void _normalize(char* text) |
|
{ |
|
Uint32 length = strlen(text); |
|
char* p = text; |
|
char* end = p + length; |
|
|
|
// Remove leading spaces: |
|
|
|
while (_isspace(*p)) |
|
p++; |
|
|
|
if (p != text) |
|
memmove(text, p, end - p + 1); |
|
|
|
p = text; |
|
|
|
// Look for sequences of more than one space and remove all but one. |
|
|
|
for (;;) |
|
{ |
|
// Advance to the next space: |
|
|
|
while (*p && !_isspace(*p)) |
|
p++; |
|
|
|
if (!*p) |
|
break; |
|
|
|
// Advance to the next non-space: |
|
|
|
char* q = p++; |
|
|
|
while (_isspace(*p)) |
|
p++; |
|
|
|
// Discard trailing spaces (if we are at the end): |
|
|
|
if (!*p) |
|
{ |
|
*q = '\0'; |
|
break; |
|
} |
|
|
|
// Remove the redundant spaces: |
|
|
|
Uint32 n = p - q; |
|
|
|
if (n > 1) |
|
{ |
|
*q++ = ' '; |
|
memmove(q, p, end - p + 1); |
|
p = q; |
|
} |
|
} |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlException | // XmlException |
|
|
"Unterminated comment", | "Unterminated comment", |
"Unterminated CDATA block", | "Unterminated CDATA block", |
"Unterminated DOCTYPE", | "Unterminated DOCTYPE", |
"Too many attributes: parser only handles 10", |
|
"Malformed reference", | "Malformed reference", |
"Expected a comment or CDATA following \"<!\" sequence", | "Expected a comment or CDATA following \"<!\" sequence", |
"Closing element does not match opening element", | "Closing element does not match opening element", |
"One or more tags are still open", | "One or more tags are still open", |
"More than one root element was encountered", | "More than one root element was encountered", |
"Validation error", | "Validation error", |
"Semantic error" |
"Semantic error", |
|
"Namespace not declared" |
}; | }; |
| |
static const char* _xmlKeys[] = | static const char* _xmlKeys[] = |
|
|
"Common.XmlParser.UNTERMINATED_COMMENT", | "Common.XmlParser.UNTERMINATED_COMMENT", |
"Common.XmlParser.UNTERMINATED_CDATA", | "Common.XmlParser.UNTERMINATED_CDATA", |
"Common.XmlParser.UNTERMINATED_DOCTYPE", | "Common.XmlParser.UNTERMINATED_DOCTYPE", |
"Common.XmlParser.TOO_MANY_ATTRIBUTES", |
|
"Common.XmlParser.MALFORMED_REFERENCE", | "Common.XmlParser.MALFORMED_REFERENCE", |
"Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", | "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", |
"Common.XmlParser.START_END_MISMATCH", | "Common.XmlParser.START_END_MISMATCH", |
"Common.XmlParser.UNCLOSED_TAGS", | "Common.XmlParser.UNCLOSED_TAGS", |
"Common.XmlParser.MULTIPLE_ROOTS", | "Common.XmlParser.MULTIPLE_ROOTS", |
"Common.XmlParser.VALIDATION_ERROR", | "Common.XmlParser.VALIDATION_ERROR", |
"Common.XmlParser.SEMANTIC_ERROR" |
"Common.XmlParser.SEMANTIC_ERROR", |
|
"Common.XmlParser.UNDECLARED_NAMESPACE" |
}; | }; |
| |
// l10n replace _formMessage (comment out the old one) |
|
/* |
|
static String _formMessage(Uint32 code, Uint32 line, const String& message) |
|
{ |
|
String result = _xmlMessages[Uint32(code) - 1]; |
|
|
|
char buffer[32]; |
|
sprintf(buffer, "%d", line); |
|
result.append(": on line "); |
|
result.append(buffer); |
|
| |
if (message.size()) |
static MessageLoaderParms _formMessage( |
{ |
Uint32 code, |
result.append(": "); |
Uint32 line, |
result.append(message); |
const String& message) |
} |
|
|
|
return result; |
|
} |
|
*/ |
|
|
|
static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message) |
|
{ | { |
String dftMsg = _xmlMessages[Uint32(code) - 1]; | String dftMsg = _xmlMessages[Uint32(code) - 1]; |
String key = _xmlKeys[Uint32(code) - 1]; | String key = _xmlKeys[Uint32(code) - 1]; |
|
|
const String& message) | const String& message) |
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) | : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) |
{ | { |
|
|
} | } |
| |
| |
|
|
MessageLoaderParms& msgParms) | MessageLoaderParms& msgParms) |
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) | : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) |
{ | { |
|
|
} | } |
| |
| |
|
|
const String& message) | const String& message) |
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) | : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) |
{ | { |
|
|
} | } |
| |
| |
|
|
MessageLoaderParms& msgParms) | MessageLoaderParms& msgParms) |
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) | : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) |
{ | { |
|
|
} | } |
| |
| |
|
|
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), |
XmlParser::XmlParser(char* text, XmlNamespace* ns) |
_restoreChar('\0'), _foundRoot(false) |
: _line(1), |
|
_current(text), |
|
_restoreChar('\0'), |
|
_foundRoot(false), |
|
_scopeLevel(0), |
|
_supportedNamespaces(ns), |
|
_currentUnsupportedNSType(-1) |
|
{ |
|
} |
|
|
|
inline void _skipWhitespace(Uint32& line, char*& p) |
|
{ |
|
while (*p && _isspace(*p)) |
|
{ |
|
if (*p == '\n') |
|
line++; |
|
|
|
p++; |
|
} |
|
} |
|
|
|
#if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \ |
|
defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) |
|
#pragma optimize( "", off ) |
|
#endif |
|
static int _getEntityRef(char*& p) |
|
{ |
|
if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) |
|
{ |
|
p += 3; |
|
return '>'; |
|
} |
|
|
|
if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) |
|
{ |
|
p += 3; |
|
return '<'; |
|
} |
|
|
|
if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && |
|
(p[4] == ';')) |
|
{ |
|
p += 5; |
|
return '\''; |
|
} |
|
|
|
if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && |
|
(p[4] == ';')) |
|
{ |
|
p += 5; |
|
return '"'; |
|
} |
|
|
|
if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) |
{ | { |
|
p += 4; |
|
return '&'; |
|
} |
| |
|
return -1; |
} | } |
|
#if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \ |
|
defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) |
|
#pragma optimize( "", on ) |
|
#endif |
| |
Boolean XmlParser::next(XmlEntry& entry) |
static inline int _getCharRef(char*& p, bool hex) |
{ | { |
|
char* end; |
|
unsigned long ch; |
|
|
|
if (hex) |
|
{ |
|
ch = strtoul(p, &end, 16); |
|
} |
|
else |
|
{ |
|
ch = strtoul(p, &end, 10); |
|
} |
|
|
|
if ((end == p) || (*end != ';') || (ch > 255)) |
|
{ |
|
return -1; |
|
} |
|
|
|
if ((hex && (end - p > 4)) || (!hex && (end - p > 5))) |
|
{ |
|
return -1; |
|
} |
|
|
|
p = end + 1; |
|
|
|
return ch; |
|
} |
|
|
|
static void _normalize(Uint32& line, char*& p, char end_char, char*& start) |
|
{ |
|
// Skip over leading whitespace: |
|
|
|
_skipWhitespace(line, p); |
|
start = p; |
|
|
|
// Process one character at a time: |
|
|
|
char* q = p; |
|
|
|
while (*p && (*p != end_char)) |
|
{ |
|
if (_isspace(*p)) |
|
{ |
|
// Compress sequences of whitespace characters to a single space |
|
// character. Update line number when newlines encountered. |
|
|
|
if (*p++ == '\n') |
|
{ |
|
line++; |
|
} |
|
|
|
*q++ = ' '; |
|
|
|
_skipWhitespace(line, p); |
|
} |
|
else if (*p == '&') |
|
{ |
|
// Process entity characters and entity references: |
|
|
|
p++; |
|
int ch; |
|
|
|
if (*p == '#') |
|
{ |
|
*p++; |
|
|
|
if (*p == 'x') |
|
{ |
|
p++; |
|
ch = _getCharRef(p, true); |
|
} |
|
else |
|
{ |
|
ch = _getCharRef(p, false); |
|
} |
|
} |
|
else |
|
{ |
|
ch = _getEntityRef(p); |
|
} |
|
|
|
if (ch == -1) |
|
{ |
|
throw XmlException(XmlException::MALFORMED_REFERENCE, line); |
|
} |
|
|
|
*q++ = ch; |
|
} |
|
else |
|
{ |
|
*q++ = *p++; |
|
} |
|
} |
|
|
|
// We encountered a the end_char or a zero-terminator. |
|
|
|
*q = *p; |
|
|
|
// Remove single trailing whitespace (consecutive whitespaces already |
|
// compressed above). Since p >= q, we can tell if we need to strip a |
|
// trailing space from q by looking at the end of p. We must not look at |
|
// the last character of p, though, if p is an empty string. |
|
|
|
if ((p != start) && _isspace(p[-1])) |
|
{ |
|
q--; |
|
} |
|
|
|
// If q got behind p, it is safe and necessary to null-terminate q |
|
|
|
if (q != p) |
|
{ |
|
*q = '\0'; |
|
} |
|
} |
|
|
|
Boolean XmlParser::next( |
|
XmlEntry& entry, |
|
Boolean includeComment) |
|
{ |
|
entry.attributes.clear(); |
|
|
if (!_putBackStack.isEmpty()) | if (!_putBackStack.isEmpty()) |
{ | { |
entry = _putBackStack.top(); | entry = _putBackStack.top(); |
|
|
_restoreChar = '\0'; | _restoreChar = '\0'; |
} | } |
| |
|
// Loop until we are done with comments if includeComment is false. |
|
do |
|
{ |
// Skip over any whitespace: | // Skip over any whitespace: |
|
_skipWhitespace(_line, _current); |
_skipWhitespace(_current); |
|
| |
if (!*_current) | if (!*_current) |
{ | { |
|
|
| |
_stack.pop(); | _stack.pop(); |
} | } |
|
|
return true; |
|
} | } |
else | else |
{ | { |
|
// Normalize the content: |
|
|
|
char* start; |
|
_normalize(_line, _current, '<', start); |
|
|
|
// Get the content: |
|
|
entry.type = XmlEntry::CONTENT; | entry.type = XmlEntry::CONTENT; |
entry.text = _current; |
entry.text = start; |
_getContent(_current); |
|
|
// Overwrite '<' with a null character (temporarily). |
|
|
_restoreChar = *_current; | _restoreChar = *_current; |
*_current = '\0'; | *_current = '\0'; |
| |
if (nullTerminator) | if (nullTerminator) |
*nullTerminator = '\0'; | *nullTerminator = '\0'; |
|
} |
|
} while (!includeComment && entry.type == XmlEntry::COMMENT); |
|
|
|
if (_supportedNamespaces && |
|
(entry.type == XmlEntry::START_TAG || |
|
entry.type == XmlEntry::EMPTY_TAG || |
|
entry.type == XmlEntry::END_TAG)) |
|
{ |
|
// Process attributes and enter namespaces into the table |
|
if (entry.type == XmlEntry::START_TAG || |
|
entry.type == XmlEntry::EMPTY_TAG) |
|
{ |
|
_scopeLevel++; |
|
for (unsigned int i = 0; i < entry.attributes.size(); i++) |
|
{ |
|
XmlAttribute& attr = entry.attributes[i]; |
|
if (strncmp(attr.name, "xmlns", 5) == 0) |
|
{ |
|
XmlNamespace ns; |
|
if (attr.name[5] == ':') |
|
{ |
|
ns.localName = attr.localName; |
|
} |
|
else |
|
{ |
|
// Default name space has no local name |
|
ns.localName = 0; |
|
} |
|
ns.extendedName = attr.value; |
|
ns.scopeLevel = _scopeLevel; |
|
ns.type = getSupportedNamespaceType(ns.extendedName); |
|
|
|
// If the namespace is not supported, assign it a unique |
|
// negative indentifier. |
|
if (ns.type == -1) |
|
{ |
|
ns.type = _currentUnsupportedNSType--; |
|
} |
|
|
|
// Even unsupported namespaces get pushed onto the stack. |
|
// We will throw an exception of there is an attempt to |
|
// reference an unsupported namespace later. |
|
_nameSpaces.push(ns); |
|
} |
|
else |
|
{ |
|
// Attribute names may also be namespace qualified. |
|
attr.nsType = _getNamespaceType(attr.name); |
|
} |
|
} |
|
} |
|
|
|
// Get the namespace type for this tag. |
|
entry.nsType = _getNamespaceType(entry.text); |
|
|
|
if (entry.type == XmlEntry::END_TAG || |
|
entry.type == XmlEntry::EMPTY_TAG) |
|
{ |
|
// Remove any namespaces of the current scope level from |
|
// the scope stack. |
|
while (!_nameSpaces.isEmpty() && |
|
_scopeLevel <= _nameSpaces.top().scopeLevel) |
|
{ |
|
_nameSpaces.pop(); |
|
} |
| |
_substituteReferences((char*)entry.text); |
PEGASUS_ASSERT(_scopeLevel > 0); |
_normalize((char*)entry.text); |
_scopeLevel--; |
|
} |
|
} |
|
else |
|
{ |
|
entry.nsType = -1; |
|
} |
| |
return true; | return true; |
} | } |
|
|
|
// Get the namespace type of the given tag |
|
int XmlParser::_getNamespaceType(const char* tag) |
|
{ |
|
const char* pos = strchr(tag, ':'); |
|
|
|
// If ":" is not found, the tag is not namespace qualified and we |
|
// need to look for the default name space. |
|
|
|
// Search the namespace stack from the top |
|
for (int i = _nameSpaces.size() - 1; i >=0; i--) |
|
{ |
|
// If ":" is found, look for the name space with the matching |
|
// local name... |
|
if ((pos && _nameSpaces[i].localName && |
|
!strncmp(_nameSpaces[i].localName, tag, pos - tag)) || |
|
// ... otherwise look for the default name space. It's the |
|
// one with localName set to NULL |
|
(!pos && !_nameSpaces[i].localName)) |
|
{ |
|
return _nameSpaces[i].type; |
|
} |
|
} |
|
|
|
// If the tag is namespace qualified, but the name space has not been |
|
// declared, it's malformed XML and we must throw an exception. |
|
// Note: The "xml" namespace is specifically defined by the W3C as a |
|
// reserved prefix ("http://www.w3.org/XML/1998/namespace"). |
|
if (pos && (strncmp(tag, "xml:", 4) != 0)) |
|
{ |
|
throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line); |
|
} |
|
|
|
// Otherwise it's OK not to have a name space. |
|
return -1; |
|
} |
|
|
|
// Given the extended namespace name, find it in the table of supported |
|
// namespaces and return its type. |
|
int XmlParser::getSupportedNamespaceType(const char* extendedName) |
|
{ |
|
for (int i = 0; |
|
_supportedNamespaces[i].localName != 0; |
|
i++) |
|
{ |
|
PEGASUS_ASSERT(_supportedNamespaces[i].type == i); |
|
if (!strcmp(_supportedNamespaces[i].extendedName, extendedName)) |
|
{ |
|
return _supportedNamespaces[i].type; |
|
} |
|
} |
|
return -1; |
|
} |
|
|
|
XmlNamespace* XmlParser::getNamespace(int nsType) |
|
{ |
|
for (int i = _nameSpaces.size() - 1; i >=0; i--) |
|
{ |
|
if (_nameSpaces[i].type == nsType) |
|
{ |
|
return &_nameSpaces[i]; |
|
} |
|
} |
|
return 0; |
} | } |
| |
void XmlParser::putBack(XmlEntry& entry) | void XmlParser::putBack(XmlEntry& entry) |
|
|
// Nothing to do! | // Nothing to do! |
} | } |
| |
void XmlParser::_skipWhitespace(char*& p) |
// A-Za-z0-9_-. (Note that ':' is not included and must be checked separately) |
|
static unsigned char _isInnerElementChar[] = |
{ | { |
while (*p && _isspace(*p)) |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1, |
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
}; |
|
|
|
inline Boolean _getQName(char*& p, const char*& localName) |
{ | { |
if (*p == '\n') |
localName = p; |
_line++; |
|
|
if (!CharSet::isAlNumUnder(Uint8(*p))) |
|
return false; |
| |
p++; | p++; |
} |
|
} |
|
| |
Boolean XmlParser::_getElementName(char*& p) |
while (*p && _isInnerElementChar[Uint8(*p)]) |
|
p++; |
|
|
|
// We've validated the prefix, now validate the local name |
|
if (*p == ':') |
{ | { |
if (!(((*p >= 'A') && (*p <= 'Z')) || |
localName = ++p; |
((*p >= 'a') && (*p <= 'z')) || |
|
(*p == '_'))) |
if (!CharSet::isAlNumUnder(Uint8(*p))) |
throw XmlException(XmlException::BAD_START_TAG, _line); |
return false; |
|
|
p++; | p++; |
| |
while ((*p) && |
while (*p && _isInnerElementChar[Uint8(*p)]) |
(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
((*p >= '0') && (*p <= '9')) || |
|
*p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
p++; | p++; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
Boolean XmlParser::_getElementName(char*& p, const char*& localName) |
|
{ |
|
if (!_getQName(p, localName)) |
|
throw XmlException(XmlException::BAD_START_TAG, _line); |
| |
// The next character must be a space: | // The next character must be a space: |
| |
if (_isspace(*p)) | if (_isspace(*p)) |
{ | { |
*p++ = '\0'; | *p++ = '\0'; |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
} | } |
| |
if (*p == '>') | if (*p == '>') |
|
|
return false; | return false; |
} | } |
| |
Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement) |
Boolean XmlParser::_getOpenElementName( |
|
char*& p, |
|
const char*& localName, |
|
Boolean& openCloseElement) |
{ | { |
openCloseElement = false; | openCloseElement = false; |
| |
if (!(((*p >= 'A') && (*p <= 'Z')) || |
if (!_getQName(p, localName)) |
((*p >= 'a') && (*p <= 'z')) || |
|
(*p == '_'))) |
|
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
p++; |
|
|
|
while ((*p) && |
|
(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
((*p >= '0') && (*p <= '9')) || |
|
*p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
p++; |
|
| |
// The next character must be a space: | // The next character must be a space: |
| |
if (_isspace(*p)) | if (_isspace(*p)) |
{ | { |
*p++ = '\0'; | *p++ = '\0'; |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
} | } |
| |
if (*p == '>') | if (*p == '>') |
|
|
return false; | return false; |
} | } |
| |
void XmlParser::_getAttributeNameAndEqual(char*& p) |
void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName) |
{ | { |
if (!(((*p >= 'A') && (*p <= 'Z')) || |
if (!_getQName(p, localName)) |
((*p >= 'a') && (*p <= 'z')) || |
|
(*p == '_'))) |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); |
p++; |
|
|
|
while ((*p) && |
|
(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
((*p >= '0') && (*p <= '9')) || |
|
*p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
p++; |
|
| |
char* term = p; | char* term = p; |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
| |
if (*p != '=') | if (*p != '=') |
throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); |
| |
p++; | p++; |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
| |
*term = '\0'; | *term = '\0'; |
} | } |
| |
void XmlParser::_getAttributeValue(char*& p) |
|
{ |
|
// ATTN-B: handle values contained in semiquotes: |
|
|
|
if (*p != '"' && *p != '\'') |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
|
|
char startChar = *p++; |
|
|
|
while (*p && *p != startChar) |
|
p++; |
|
|
|
if (*p != startChar) |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
|
|
*p++ = '\0'; |
|
} |
|
|
|
void XmlParser::_getComment(char*& p) | void XmlParser::_getComment(char*& p) |
{ | { |
// Now p points to first non-whitespace character beyond "<--" sequence: | // Now p points to first non-whitespace character beyond "<--" sequence: |
|
|
p++; | p++; |
} | } |
| |
void XmlParser::_getContent(char*& p) |
|
{ |
|
while (*p && *p != '<') |
|
{ |
|
if (*p == '\n') |
|
_line++; |
|
|
|
p++; |
|
} |
|
} |
|
|
|
void XmlParser::_substituteReferences(char* text) |
|
{ |
|
Uint32 rem = strlen(text); |
|
|
|
for (char* p = text; *p; p++, rem--) |
|
{ |
|
if (*p == '&') |
|
{ |
|
// Process character or entity reference |
|
|
|
Uint16 referenceChar = 0; |
|
Uint32 referenceLength = 0; |
|
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
|
|
|
if (*(p+1) == '#') |
|
{ |
|
// Found a character (numeric) reference |
|
// Determine whether it is decimal or hex |
|
if (*(p+2) == 'x') |
|
{ |
|
// Decode a hexadecimal character reference |
|
char* q = p+3; |
|
|
|
// At most four digits are allowed, plus trailing ';' |
|
Uint32 numDigits; |
|
for (numDigits = 0; numDigits < 5; numDigits++, q++) |
|
{ |
|
if (isdigit(*q)) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - '0'); |
|
} |
|
else if ((*q >= 'A') && (*q <= 'F')) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - 'A' + 10); |
|
} |
|
else if ((*q >= 'a') && (*q <= 'f')) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - 'a' + 10); |
|
} |
|
else if (*q == ';') |
|
{ |
|
break; |
|
} |
|
else |
|
{ |
|
throw XmlException(code, _line); |
|
} |
|
} |
|
|
|
// Hex number must be 1 - 4 digits |
|
if ((numDigits == 0) || (numDigits > 4)) |
|
{ |
|
throw XmlException(code, _line); |
|
} |
|
|
|
// ATTN: Currently do not support 16-bit characters |
|
if (referenceChar > 0xff) |
|
{ |
|
// ATTN: Is there a good way to say "unsupported"? |
|
throw XmlException(code, _line); |
|
} |
|
|
|
referenceLength = numDigits + 4; |
|
} |
|
else |
|
{ |
|
// Decode a decimal character reference |
|
Uint32 newChar = 0; |
|
char* q = p+2; |
|
|
|
// At most five digits are allowed, plus trailing ';' |
|
Uint32 numDigits; |
|
for (numDigits = 0; numDigits < 6; numDigits++, q++) |
|
{ |
|
if (isdigit(*q)) |
|
{ |
|
newChar = (newChar * 10); |
|
newChar += (*q - '0'); |
|
} |
|
else if (*q == ';') |
|
{ |
|
break; |
|
} |
|
else |
|
{ |
|
throw XmlException(code, _line); |
|
} |
|
} |
|
|
|
// Decimal number must be 1 - 5 digits and fit in 16 bits |
|
if ((numDigits == 0) || (numDigits > 5) || |
|
(newChar > 0xffff)) |
|
{ |
|
throw XmlException(code, _line); |
|
} |
|
|
|
// ATTN: Currently do not support 16-bit characters |
|
if (newChar > 0xff) |
|
{ |
|
// ATTN: Is there a good way to say "unsupported"? |
|
throw XmlException(code, _line); |
|
} |
|
|
|
referenceChar = Uint16(newChar); |
|
referenceLength = numDigits + 3; |
|
} |
|
} |
|
else |
|
{ |
|
// Check for entity reference |
|
// ATTN: Inefficient if many entity references are supported |
|
Uint32 i; |
|
for (i = 0; i < _REFERENCES_SIZE; i++) |
|
{ |
|
Uint32 length = _references[i].length; |
|
const char* match = _references[i].match; |
|
|
|
if (strncmp(p, _references[i].match, length) == 0) |
|
{ |
|
referenceChar = _references[i].replacement; |
|
referenceLength = length; |
|
break; |
|
} |
|
} |
|
|
|
if (i == _REFERENCES_SIZE) |
|
{ |
|
// Didn't recognize the entity reference |
|
// ATTN: Is there a good way to say "unsupported"? |
|
throw XmlException(code, _line); |
|
} |
|
} |
|
|
|
// Replace the reference with the correct character |
|
*p = (char)referenceChar; |
|
char* q = p + referenceLength; |
|
rem = rem - referenceLength + 1; |
|
memmove(p + 1, q, rem); |
|
} |
|
} |
|
} |
|
|
|
static const char _EMPTY_STRING[] = ""; |
|
|
|
void XmlParser::_getElement(char*& p, XmlEntry& entry) | void XmlParser::_getElement(char*& p, XmlEntry& entry) |
{ | { |
entry.attributeCount = 0; |
|
|
|
//-------------------------------------------------------------------------- | //-------------------------------------------------------------------------- |
// Get the element name (expect one of these: '?', '!', [A-Za-z_]) | // Get the element name (expect one of these: '?', '!', [A-Za-z_]) |
//-------------------------------------------------------------------------- | //-------------------------------------------------------------------------- |
|
|
| |
Boolean openCloseElement = false; | Boolean openCloseElement = false; |
| |
if (_getElementName(p)) |
if (_getElementName(p, entry.localName)) |
return; | return; |
} | } |
else if (*p == '!') | else if (*p == '!') |
|
|
else if (memcmp(p, "DOCTYPE", 7) == 0) | else if (memcmp(p, "DOCTYPE", 7) == 0) |
{ | { |
entry.type = XmlEntry::DOCTYPE; | entry.type = XmlEntry::DOCTYPE; |
entry.text = _EMPTY_STRING; |
entry.text = ""; |
_getDocType(p); | _getDocType(p); |
return; | return; |
} | } |
|
|
entry.type = XmlEntry::END_TAG; | entry.type = XmlEntry::END_TAG; |
entry.text = ++p; | entry.text = ++p; |
| |
if (!_getElementName(p)) |
if (!_getElementName(p, entry.localName)) |
throw(XmlException(XmlException::BAD_END_TAG, _line)); | throw(XmlException(XmlException::BAD_END_TAG, _line)); |
| |
return; | return; |
|
|
| |
Boolean openCloseElement = false; | Boolean openCloseElement = false; |
| |
if (_getOpenElementName(p, openCloseElement)) |
if (_getOpenElementName(p, entry.localName, openCloseElement)) |
{ | { |
if (openCloseElement) | if (openCloseElement) |
entry.type = XmlEntry::EMPTY_TAG; | entry.type = XmlEntry::EMPTY_TAG; |
|
|
} | } |
| |
XmlAttribute attr; | XmlAttribute attr; |
|
attr.nsType = -1; |
attr.name = p; | attr.name = p; |
_getAttributeNameAndEqual(p); |
_getAttributeNameAndEqual(p, attr.localName); |
|
|
|
// Get the attribute value (e.g., "some value") |
|
{ |
|
if ((*p != '"') && (*p != '\'')) |
|
{ |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
} |
|
|
|
char quote = *p++; |
| |
if (*p != '"' && *p != '\'') |
char* start; |
|
_normalize(_line, p, quote, start); |
|
attr.value = start; |
|
|
|
if (*p != quote) |
|
{ |
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
} |
| |
attr.value = p + 1; |
// Overwrite the closing quote with a null-terminator: |
_getAttributeValue(p); |
|
|
*p++ = '\0'; |
|
} |
| |
if (entry.type == XmlEntry::XML_DECLARATION) | if (entry.type == XmlEntry::XML_DECLARATION) |
{ | { |
|
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
} | } |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
| |
if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) |
entry.attributes.append(attr); |
throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); |
|
|
|
_substituteReferences((char*)attr.value); |
|
entry.attributes[entry.attributeCount++] = attr; |
|
} | } |
} | } |
| |
|
|
| |
PEGASUS_STD(cout) << '\n'; | PEGASUS_STD(cout) << '\n'; |
| |
for (Uint32 i = 0; i < attributeCount; i++) |
for (Uint32 i = 0; i < attributes.size(); i++) |
{ | { |
PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; | PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; |
_printValue(attributes[i].value); | _printValue(attributes[i].value); |
|
|
const XmlAttribute* XmlEntry::findAttribute( | const XmlAttribute* XmlEntry::findAttribute( |
const char* name) const | const char* name) const |
{ | { |
for (Uint32 i = 0; i < attributeCount; i++) |
for (Uint32 i = 0; i < attributes.size(); i++) |
{ | { |
if (strcmp(attributes[i].name, name) == 0) | if (strcmp(attributes[i].name, name) == 0) |
return &attributes[i]; | return &attributes[i]; |
|
|
return 0; | return 0; |
} | } |
| |
|
const XmlAttribute* XmlEntry::findAttribute( |
|
int nsType, |
|
const char* name) const |
|
{ |
|
for (Uint32 i = 0; i < attributes.size(); i++) |
|
{ |
|
if ((attributes[i].nsType == nsType) && |
|
(strcmp(attributes[i].localName, name) == 0)) |
|
{ |
|
return &attributes[i]; |
|
} |
|
} |
|
|
|
return 0; |
|
} |
|
|
// Find first non-whitespace character (set first) and last non-whitespace | // Find first non-whitespace character (set first) and last non-whitespace |
// character (set last one past this). For example, consider this string: | // character (set last one past this). For example, consider this string: |
// | // |
|
|
if (!end || end != last) | if (!end || end != last) |
return false; | return false; |
| |
value = Uint32(tmp); |
value = static_cast<Real32>(tmp); |
return true; | return true; |
} | } |
| |
|
|
return true; | return true; |
} | } |
| |
void XmlAppendCString(Array<Sint8>& out, const char* str) |
void XmlAppendCString(Buffer& out, const char* str) |
{ | { |
out.append(str, strlen(str)); |
out.append(str, static_cast<Uint32>(strlen(str))); |
} | } |
| |
PEGASUS_NAMESPACE_END | PEGASUS_NAMESPACE_END |