version 1.43.2.4, 2008/03/12 23:40:26
|
version 1.47, 2008/08/07 18:03:48
|
|
|
"Unterminated comment", | "Unterminated comment", |
"Unterminated CDATA block", | "Unterminated CDATA block", |
"Unterminated DOCTYPE", | "Unterminated DOCTYPE", |
"Too many attributes: parser only handles 10", |
|
"Malformed reference", | "Malformed reference", |
"Expected a comment or CDATA following \"<!\" sequence", | "Expected a comment or CDATA following \"<!\" sequence", |
"Closing element does not match opening element", | "Closing element does not match opening element", |
|
|
"More than one root element was encountered", | "More than one root element was encountered", |
"Validation error", | "Validation error", |
"Semantic error", | "Semantic error", |
"Malformed namespace declaration", |
|
"Namespace not supported", |
|
"Namespace not declared" | "Namespace not declared" |
}; | }; |
| |
|
|
"Common.XmlParser.UNTERMINATED_COMMENT", | "Common.XmlParser.UNTERMINATED_COMMENT", |
"Common.XmlParser.UNTERMINATED_CDATA", | "Common.XmlParser.UNTERMINATED_CDATA", |
"Common.XmlParser.UNTERMINATED_DOCTYPE", | "Common.XmlParser.UNTERMINATED_DOCTYPE", |
"Common.XmlParser.TOO_MANY_ATTRIBUTES", |
|
"Common.XmlParser.MALFORMED_REFERENCE", | "Common.XmlParser.MALFORMED_REFERENCE", |
"Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", | "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", |
"Common.XmlParser.START_END_MISMATCH", | "Common.XmlParser.START_END_MISMATCH", |
|
|
"Common.XmlParser.MULTIPLE_ROOTS", | "Common.XmlParser.MULTIPLE_ROOTS", |
"Common.XmlParser.VALIDATION_ERROR", | "Common.XmlParser.VALIDATION_ERROR", |
"Common.XmlParser.SEMANTIC_ERROR", | "Common.XmlParser.SEMANTIC_ERROR", |
"Common.XmlParser.MALFORMED_NAMESPACE_DECL", |
|
"Common.XmlParser.UNSUPPORTED_NAMESPACE", |
|
"Common.XmlParser.UNDECLARED_NAMESPACE" | "Common.XmlParser.UNDECLARED_NAMESPACE" |
}; | }; |
| |
|
|
_current(text), | _current(text), |
_restoreChar('\0'), | _restoreChar('\0'), |
_foundRoot(false), | _foundRoot(false), |
_scopeLevel(0), |
_supportedNamespaces(ns), |
_supportedNamespaces(ns) |
// Start valid indexes with -2. -1 is reserved for not found. |
|
_currentUnsupportedNSType(-2) |
{ | { |
} | } |
| |
|
|
} | } |
} | } |
| |
// We encountered a the end_char or a zero-terminator. |
|
|
|
*q = *p; |
|
|
|
// Remove single trailing whitespace (consecutive whitespaces already | // Remove single trailing whitespace (consecutive whitespaces already |
// compressed above). Since p >= q, we can tell if we need to strip a | // compressed above). Since p >= q, we can tell if we need to strip a |
// trailing space from q by looking at the end of p. We must not look at | // trailing space from q by looking at the end of p. We must not look at |
// the last character of p, though, if p is an empty string. | // the last character of p, though, if p is an empty string. |
|
Boolean adjust_q = (p != start) && _isspace(p[-1]); |
|
|
|
// We encountered a the end_char or a zero-terminator. |
| |
if ((p != start) && _isspace(p[-1])) |
*q = *p; |
|
|
|
if (adjust_q) |
{ | { |
q--; | q--; |
} | } |
|
|
XmlEntry& entry, | XmlEntry& entry, |
Boolean includeComment) | Boolean includeComment) |
{ | { |
entry.attributes.clear(); |
|
|
|
if (!_putBackStack.isEmpty()) | if (!_putBackStack.isEmpty()) |
{ | { |
entry = _putBackStack.top(); | entry = _putBackStack.top(); |
|
|
_restoreChar = '\0'; | _restoreChar = '\0'; |
} | } |
| |
|
entry.attributes.clear(); |
|
|
|
if (_supportedNamespaces) |
|
{ |
|
// Remove namespaces of a deeper scope level from the stack. |
|
while (!_nameSpaces.isEmpty() && |
|
_nameSpaces.top().scopeLevel > _stack.size()) |
|
{ |
|
_nameSpaces.pop(); |
|
} |
|
} |
|
|
// Loop until we are done with comments if includeComment is false. | // Loop until we are done with comments if includeComment is false. |
do | do |
{ | { |
|
|
entry.type == XmlEntry::EMPTY_TAG || | entry.type == XmlEntry::EMPTY_TAG || |
entry.type == XmlEntry::END_TAG)) | entry.type == XmlEntry::END_TAG)) |
{ | { |
// Process attributes and enter namespaces into the table |
// Determine the namespace type for this entry |
|
|
if (entry.type == XmlEntry::START_TAG || | if (entry.type == XmlEntry::START_TAG || |
entry.type == XmlEntry::EMPTY_TAG) | entry.type == XmlEntry::EMPTY_TAG) |
{ | { |
_scopeLevel++; |
// Process namespace declarations and determine the namespace type |
for (unsigned int i = 0; i < entry.attributes.size(); i++) |
// for the attributes. |
|
|
|
Uint32 scopeLevel = _stack.size(); |
|
if (entry.type == XmlEntry::EMPTY_TAG) |
|
{ |
|
// Empty tags are deeper scope, but not pushed onto the stack |
|
scopeLevel++; |
|
} |
|
|
|
for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++) |
{ | { |
XmlAttribute& attr = entry.attributes[i]; | XmlAttribute& attr = entry.attributes[i]; |
if (strncmp(attr.name, "xmlns", 5) == 0) |
if ((strncmp(attr.name, "xmlns:", 6) == 0) || |
|
(strcmp(attr.name, "xmlns") == 0)) |
{ | { |
|
// Process a namespace declaration |
XmlNamespace ns; | XmlNamespace ns; |
if (attr.name[5] == ':') | if (attr.name[5] == ':') |
{ | { |
ns.localName = attr.name + 6; |
ns.localName = attr.localName; |
|
|
// Check if we have malformed XML of the form: |
|
// "xmlns:=URI". In this case attr.name will be set |
|
// to "xmlns:" and ns.localName will point to '\0' |
|
if (ns.localName[0] == '\0') |
|
{ |
|
throw XmlException( |
|
XmlException::MALFORMED_NAMESPACE_DECL, |
|
_line); |
|
} |
|
} | } |
else | else |
{ | { |
|
|
ns.localName = 0; | ns.localName = 0; |
} | } |
ns.extendedName = attr.value; | ns.extendedName = attr.value; |
ns.scopeLevel = _scopeLevel; |
ns.scopeLevel = scopeLevel; |
ns.type = getSupportedNamespaceType(ns.extendedName); |
ns.type = _getSupportedNamespaceType(ns.extendedName); |
|
|
|
// If the namespace is not supported, assign it a unique |
|
// negative identifier. |
|
if (ns.type == -1) |
|
{ |
|
ns.type = _currentUnsupportedNSType--; |
|
} |
| |
// Even unsupported namespaces get pushed onto the stack. |
|
// We will throw an exception of there is an attempt to |
|
// reference an unsupported namespace later. |
|
_nameSpaces.push(ns); | _nameSpaces.push(ns); |
} | } |
else | else |
{ | { |
// Attribute names may also be namespace qualified. |
// Get the namespace type for this attribute. |
attr.nsType = _getNamespaceType(attr.name); | attr.nsType = _getNamespaceType(attr.name); |
} | } |
} | } |
} | } |
| |
// Get the namespace type for this tag. |
|
entry.nsType = _getNamespaceType(entry.text); | entry.nsType = _getNamespaceType(entry.text); |
|
|
if (entry.type == XmlEntry::END_TAG || |
|
entry.type == XmlEntry::EMPTY_TAG) |
|
{ |
|
// Remove any namespaces of the current scope level from |
|
// the scope stack. |
|
while (!_nameSpaces.isEmpty() && |
|
_scopeLevel <= _nameSpaces.top().scopeLevel) |
|
{ |
|
_nameSpaces.pop(); |
|
} |
|
|
|
PEGASUS_ASSERT(_scopeLevel > 0); |
|
_scopeLevel--; |
|
} |
|
} | } |
else | else |
{ | { |
|
|
{ | { |
const char* pos = strchr(tag, ':'); | const char* pos = strchr(tag, ':'); |
| |
// If ":" is not found, the tag is not namespace qualified and we |
// If ':' is not found, the tag is not namespace qualified and we |
// need to look for the default name space. | // need to look for the default name space. |
| |
// Search the namespace stack from the top | // Search the namespace stack from the top |
for (int i = _nameSpaces.size() - 1; i >=0; i--) |
for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--) |
{ | { |
// If ":" is found, look for the name space with the matching |
// If ':' is found, look for the name space with the matching |
// local name... | // local name... |
if ((pos && _nameSpaces[i].localName && | if ((pos && _nameSpaces[i].localName && |
!strncmp(_nameSpaces[i].localName, tag, pos - tag)) || | !strncmp(_nameSpaces[i].localName, tag, pos - tag)) || |
|
|
// one with localName set to NULL | // one with localName set to NULL |
(!pos && !_nameSpaces[i].localName)) | (!pos && !_nameSpaces[i].localName)) |
{ | { |
// If it's a reference to an unsupported namespace, |
|
// throw an exception |
|
if (_nameSpaces[i].type == -1) |
|
{ |
|
throw XmlException(XmlException::UNSUPPORTED_NAMESPACE, _line); |
|
} |
|
return _nameSpaces[i].type; | return _nameSpaces[i].type; |
} | } |
} | } |
| |
// If the tag is namespace qualified, but the name space has not been | // If the tag is namespace qualified, but the name space has not been |
// declared, it's malformed XML and we must throw an exception |
// declared, it's malformed XML and we must throw an exception. |
if (pos) |
// Note: The "xml" namespace is specifically defined by the W3C as a |
|
// reserved prefix ("http://www.w3.org/XML/1998/namespace"). |
|
if (pos && (strncmp(tag, "xml:", 4) != 0)) |
{ | { |
throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line); | throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line); |
} | } |
|
|
return -1; | return -1; |
} | } |
| |
// Gived the extended namespace name, find it in the table of supported |
// Given the extended namespace name, find it in the table of supported |
// namespaces and return its type. | // namespaces and return its type. |
int XmlParser::getSupportedNamespaceType(const char* extendedName) |
int XmlParser::_getSupportedNamespaceType(const char* extendedName) |
{ | { |
for (int i = 0; |
for (Sint32 i = 0; |
_supportedNamespaces[i].localName != 0; | _supportedNamespaces[i].localName != 0; |
i++) | i++) |
{ | { |
|
|
| |
XmlNamespace* XmlParser::getNamespace(int nsType) | XmlNamespace* XmlParser::getNamespace(int nsType) |
{ | { |
for (int i = _nameSpaces.size() - 1; i >=0; i--) |
for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--) |
{ | { |
if (_nameSpaces[i].type == nsType) | if (_nameSpaces[i].type == nsType) |
{ | { |
|
|
// Nothing to do! | // Nothing to do! |
} | } |
| |
// A-Za-z0-9_-:. |
// A-Za-z0-9_-. (Note that ':' is not included and must be checked separately) |
static unsigned char _isInnerElementChar[] = | static unsigned char _isInnerElementChar[] = |
{ | { |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1, |
0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
}; | }; |
| |
Boolean XmlParser::_getElementName(char*& p) |
inline Boolean _getQName(char*& p, const char*& localName) |
{ | { |
|
localName = p; |
|
|
if (!CharSet::isAlNumUnder(Uint8(*p))) | if (!CharSet::isAlNumUnder(Uint8(*p))) |
throw XmlException(XmlException::BAD_START_TAG, _line); |
return false; |
|
|
|
p++; |
|
|
|
while (*p && _isInnerElementChar[Uint8(*p)]) |
|
p++; |
|
|
|
// We've validated the prefix, now validate the local name |
|
if (*p == ':') |
|
{ |
|
localName = ++p; |
|
|
|
if (!CharSet::isAlNumUnder(Uint8(*p))) |
|
return false; |
| |
p++; | p++; |
| |
while (*p && _isInnerElementChar[Uint8(*p)]) | while (*p && _isInnerElementChar[Uint8(*p)]) |
p++; | p++; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
Boolean XmlParser::_getElementName(char*& p, const char*& localName) |
|
{ |
|
if (!_getQName(p, localName)) |
|
throw XmlException(XmlException::BAD_START_TAG, _line); |
| |
// The next character must be a space: | // The next character must be a space: |
| |
|
|
return false; | return false; |
} | } |
| |
Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement) |
Boolean XmlParser::_getOpenElementName( |
|
char*& p, |
|
const char*& localName, |
|
Boolean& openCloseElement) |
{ | { |
openCloseElement = false; | openCloseElement = false; |
| |
if (!CharSet::isAlNumUnder(Uint8(*p))) |
if (!_getQName(p, localName)) |
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
| |
p++; |
|
|
|
while (*p && _isInnerElementChar[Uint8(*p)]) |
|
p++; |
|
|
|
// The next character must be a space: | // The next character must be a space: |
| |
if (_isspace(*p)) | if (_isspace(*p)) |
|
|
return false; | return false; |
} | } |
| |
void XmlParser::_getAttributeNameAndEqual(char*& p) |
void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName) |
{ | { |
if (!CharSet::isAlNumUnder((Uint8)*p)) |
if (!_getQName(p, localName)) |
throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); |
| |
p++; |
|
|
|
while (*p && _isInnerElementChar[Uint8(*p)]) |
|
p++; |
|
|
|
char* term = p; | char* term = p; |
| |
_skipWhitespace(_line, p); | _skipWhitespace(_line, p); |
|
|
entry.type = XmlEntry::XML_DECLARATION; | entry.type = XmlEntry::XML_DECLARATION; |
entry.text = ++p; | entry.text = ++p; |
| |
Boolean openCloseElement = false; |
if (_getElementName(p, entry.localName)) |
|
|
if (_getElementName(p)) |
|
return; | return; |
} | } |
else if (*p == '!') | else if (*p == '!') |
|
|
entry.type = XmlEntry::END_TAG; | entry.type = XmlEntry::END_TAG; |
entry.text = ++p; | entry.text = ++p; |
| |
if (!_getElementName(p)) |
if (!_getElementName(p, entry.localName)) |
throw(XmlException(XmlException::BAD_END_TAG, _line)); | throw(XmlException(XmlException::BAD_END_TAG, _line)); |
| |
return; | return; |
|
|
| |
Boolean openCloseElement = false; | Boolean openCloseElement = false; |
| |
if (_getOpenElementName(p, openCloseElement)) |
if (_getOpenElementName(p, entry.localName, openCloseElement)) |
{ | { |
if (openCloseElement) | if (openCloseElement) |
entry.type = XmlEntry::EMPTY_TAG; | entry.type = XmlEntry::EMPTY_TAG; |
|
|
XmlAttribute attr; | XmlAttribute attr; |
attr.nsType = -1; | attr.nsType = -1; |
attr.name = p; | attr.name = p; |
_getAttributeNameAndEqual(p); |
_getAttributeNameAndEqual(p, attr.localName); |
| |
// Get the attribute value (e.g., "some value") | // Get the attribute value (e.g., "some value") |
{ | { |
|
|
| |
PEGASUS_STD(cout) << '\n'; | PEGASUS_STD(cout) << '\n'; |
| |
for (Uint32 i = 0; i < attributes.size(); i++) |
for (Uint32 i = 0, n = attributes.size(); i < n; i++) |
{ | { |
PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; | PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; |
_printValue(attributes[i].value); | _printValue(attributes[i].value); |
|
|
const XmlAttribute* XmlEntry::findAttribute( | const XmlAttribute* XmlEntry::findAttribute( |
const char* name) const | const char* name) const |
{ | { |
for (Uint32 i = 0; i < attributes.size(); i++) |
for (Uint32 i = 0, n = attributes.size(); i < n; i++) |
{ | { |
if (strcmp(attributes[i].name, name) == 0) | if (strcmp(attributes[i].name, name) == 0) |
return &attributes[i]; | return &attributes[i]; |
|
|
return 0; | return 0; |
} | } |
| |
|
const XmlAttribute* XmlEntry::findAttribute( |
|
int attrNsType, |
|
const char* name) const |
|
{ |
|
for (Uint32 i = 0, n = attributes.size(); i < n; i++) |
|
{ |
|
if ((attributes[i].nsType == attrNsType) && |
|
(strcmp(attributes[i].localName, name) == 0)) |
|
{ |
|
return &attributes[i]; |
|
} |
|
} |
|
|
|
return 0; |
|
} |
|
|
// Find first non-whitespace character (set first) and last non-whitespace | // Find first non-whitespace character (set first) and last non-whitespace |
// character (set last one past this). For example, consider this string: | // character (set last one past this). For example, consider this string: |
// | // |