version 1.1.1.1, 2001/01/14 19:53:23
|
version 1.55.4.1, 2013/06/03 22:35:14
|
|
|
//BEGIN_LICENSE |
//%LICENSE//////////////////////////////////////////////////////////////// |
// | // |
// Copyright (c) 2000 The Open Group, BMC Software, Tivoli Systems, IBM |
// Licensed to The Open Group (TOG) under one or more contributor license |
|
// agreements. Refer to the OpenPegasusNOTICE.txt file distributed with |
|
// this work for additional information regarding copyright ownership. |
|
// Each contributor licenses this file to you under the OpenPegasus Open |
|
// Source License; you may not use this file except in compliance with the |
|
// License. |
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a | // Permission is hereby granted, free of charge, to any person obtaining a |
// copy of this software and associated documentation files (the "Software"), | // copy of this software and associated documentation files (the "Software"), |
|
|
// and/or sell copies of the Software, and to permit persons to whom the | // and/or sell copies of the Software, and to permit persons to whom the |
// Software is furnished to do so, subject to the following conditions: | // Software is furnished to do so, subject to the following conditions: |
// | // |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
// The above copyright notice and this permission notice shall be included |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
// in all copies or substantial portions of the Software. |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|
// DEALINGS IN THE SOFTWARE. |
|
// | // |
//END_LICENSE |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
//BEGIN_HISTORY |
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
// | // |
// Author: |
////////////////////////////////////////////////////////////////////////// |
// | // |
// $Log$ |
//%///////////////////////////////////////////////////////////////////////////// |
// Revision 1.1.1.1 2001/01/14 19:53:23 mike |
|
// Pegasus import |
|
// |
|
// |
|
//END_HISTORY |
|
| |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
|
|
// " - full quote | // " - full quote |
// &apos - apostrophe | // &apos - apostrophe |
// | // |
|
// as well as character (numeric) references: |
|
// |
|
// 1 - decimal reference for character '1' |
|
// 1 - hexadecimal reference for character '1' |
|
// |
// 4. Element names and attribute names take the following form: | // 4. Element names and attribute names take the following form: |
// | // |
// [A-Za-z_][A-Za-z_0-9-.:] | // [A-Za-z_][A-Za-z_0-9-.:] |
|
|
// | // |
// TODO: | // TODO: |
// | // |
// Handle <!DOCTYPE...> sections which are complicated (containing |
// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is |
|
// work. Handle <!DOCTYPE...> sections which are complicated (containing |
// rules rather than references to files). | // rules rather than references to files). |
// | // |
// Handle reference of this form: "Α" |
|
// |
|
// Remove newlines from string literals: | // Remove newlines from string literals: |
// | // |
// Example: <xyz x="hello | // Example: <xyz x="hello |
|
|
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
|
#include <Pegasus/Common/Config.h> |
#include <cctype> | #include <cctype> |
#include <cassert> |
|
#include <cstdio> | #include <cstdio> |
#include <cstdlib> | #include <cstdlib> |
#include <cstring> | #include <cstring> |
#include "XmlParser.h" | #include "XmlParser.h" |
|
#include "Logger.h" |
|
#include "ExceptionRep.h" |
|
#include "CharSet.h" |
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
|
|
for (; *p; p++) | for (; *p; p++) |
{ | { |
if (*p == '\n') | if (*p == '\n') |
std::cout << "\\n"; |
PEGASUS_STD(cout) << "\\n"; |
else if (*p == '\r') | else if (*p == '\r') |
std::cout << "\\r"; |
PEGASUS_STD(cout) << "\\r"; |
else if (*p == '\t') | else if (*p == '\t') |
std::cout << "\\t"; |
PEGASUS_STD(cout) << "\\t"; |
else | else |
std::cout << *p; |
PEGASUS_STD(cout) << *p; |
} | } |
} | } |
| |
|
|
char replacement; | char replacement; |
}; | }; |
| |
static EntityReference _references[] = |
// Implements a check for a whitespace character, without calling |
{ |
// isspace( ). The isspace( ) function is locale-sensitive, |
{ "&", 5, '&' }, |
// and incorrectly flags some chars above 0x7f as whitespace. This |
{ "<", 4, '<' }, |
// causes the XmlParser to incorrectly parse UTF-8 data. |
{ ">", 4, '>' }, |
// |
{ """, 6, '"' }, |
// Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) |
{ "'", 6, '\'' } |
// defines white space as: |
}; |
// S ::= (#x20 | #x9 | #xD | #xA)+ |
|
static inline int _isspace(char c) |
static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); |
|
|
|
// Remove all redundant spaces from the given string: |
|
|
|
static void _normalize(char* text) |
|
{ |
|
Uint32 length = strlen(text); |
|
char* p = text; |
|
char* end = p + length; |
|
|
|
// Remove leading spaces: |
|
|
|
while (isspace(*p)) |
|
p++; |
|
|
|
if (p != text) |
|
memmove(text, p, end - p + 1); |
|
|
|
p = text; |
|
|
|
// Look for sequences of more than one space and remove all but one. |
|
|
|
for (;;) |
|
{ |
|
// Advance to the next space: |
|
|
|
while (*p && !isspace(*p)) |
|
p++; |
|
|
|
if (!*p) |
|
break; |
|
|
|
// Advance to the next non-space: |
|
|
|
char* q = p++; |
|
|
|
while (isspace(*p)) |
|
p++; |
|
|
|
// Discard trailing spaces (if we are at the end): |
|
|
|
if (!*p) |
|
{ | { |
*q = '\0'; |
return CharSet::isXmlWhiteSpace((Uint8)c); |
break; |
|
} | } |
| |
// Remove the redundant spaces: |
|
|
|
Uint32 n = p - q; |
|
|
|
if (n > 1) |
|
{ |
|
*q++ = ' '; |
|
memmove(q, p, end - p + 1); |
|
p = q; |
|
} |
|
} |
|
} |
|
| |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
|
|
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
static char* _xmlMessages[] = |
static const char* _xmlMessages[] = |
{ | { |
"Bad opening element", | "Bad opening element", |
"Bad closing element", | "Bad closing element", |
|
|
"Unterminated comment", | "Unterminated comment", |
"Unterminated CDATA block", | "Unterminated CDATA block", |
"Unterminated DOCTYPE", | "Unterminated DOCTYPE", |
"Too many attributes: parser only handles 10", |
|
"Malformed reference", | "Malformed reference", |
"Expected a comment or CDATA following \"<!\" sequence", | "Expected a comment or CDATA following \"<!\" sequence", |
"Closing element does not match opening element", | "Closing element does not match opening element", |
"One or more tags are still open", | "One or more tags are still open", |
"More than one root element was encountered", | "More than one root element was encountered", |
"Validation error", | "Validation error", |
"Semantic error" |
"Semantic error", |
|
"Namespace not declared" |
}; | }; |
| |
static String _formMessage(Uint32 code, Uint32 line, const String& message) |
static const char* _xmlKeys[] = |
{ | { |
String result = _xmlMessages[Uint32(code) - 1]; |
"Common.XmlParser.BAD_START_TAG", |
|
"Common.XmlParser.BAD_END_TAG", |
|
"Common.XmlParser.BAD_ATTRIBUTE_NAME", |
|
"Common.XmlParser.EXPECTED_EQUAL_SIGN", |
|
"Common.XmlParser.BAD_ATTRIBUTE_VALUE", |
|
"Common.XmlParser.MINUS_MINUS_IN_COMMENT", |
|
"Common.XmlParser.UNTERMINATED_COMMENT", |
|
"Common.XmlParser.UNTERMINATED_CDATA", |
|
"Common.XmlParser.UNTERMINATED_DOCTYPE", |
|
"Common.XmlParser.MALFORMED_REFERENCE", |
|
"Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", |
|
"Common.XmlParser.START_END_MISMATCH", |
|
"Common.XmlParser.UNCLOSED_TAGS", |
|
"Common.XmlParser.MULTIPLE_ROOTS", |
|
"Common.XmlParser.VALIDATION_ERROR", |
|
"Common.XmlParser.SEMANTIC_ERROR", |
|
"Common.XmlParser.UNDECLARED_NAMESPACE" |
|
}; |
|
|
| |
char buffer[32]; |
static MessageLoaderParms _formMessage( |
sprintf(buffer, "%d", line); |
Uint32 code, |
result.append(": on line "); |
Uint32 line, |
result.append(buffer); |
const String& message) |
|
{ |
|
String dftMsg = _xmlMessages[Uint32(code) - 1]; |
|
const char* key = _xmlKeys[Uint32(code) - 1]; |
|
String msg = message; |
| |
if (message.getLength()) |
dftMsg.append(": on line $0"); |
|
if (message.size()) |
{ | { |
result.append(": "); |
msg = ": " + msg; |
result.append(message); |
dftMsg.append("$1"); |
} | } |
| |
return result; |
return MessageLoaderParms(key, dftMsg.getCString(), line ,msg); |
} | } |
| |
|
static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) |
|
{ |
|
String dftMsg = _xmlMessages[Uint32(code) - 1]; |
|
const char* key = _xmlKeys[Uint32(code) - 1]; |
|
|
|
dftMsg.append(": on line $0"); |
|
|
|
return MessageLoaderParms(key, dftMsg.getCString(), line); |
|
} |
|
|
|
|
XmlException::XmlException( | XmlException::XmlException( |
XmlException::Code code, | XmlException::Code code, |
Uint32 lineNumber, | Uint32 lineNumber, |
|
|
| |
} | } |
| |
|
|
|
XmlException::XmlException( |
|
XmlException::Code code, |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: Exception(_formPartialMessage(code, lineNumber)) |
|
{ |
|
if (msgParms.default_msg.size()) |
|
{ |
|
msgParms.default_msg = ": " + msgParms.default_msg; |
|
} |
|
_rep->message.append(MessageLoader::getMessage(msgParms)); |
|
} |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlValidationError | // XmlValidationError |
|
|
const String& message) | const String& message) |
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) | : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) |
{ | { |
|
} |
|
|
| |
|
XmlValidationError::XmlValidationError( |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) |
|
{ |
} | } |
| |
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlSemanticError | // XmlSemanticError |
|
|
XmlSemanticError::XmlSemanticError( | XmlSemanticError::XmlSemanticError( |
Uint32 lineNumber, | Uint32 lineNumber, |
const String& message) | const String& message) |
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) |
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) |
{ | { |
|
} |
| |
|
|
|
XmlSemanticError::XmlSemanticError( |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) |
|
{ |
} | } |
| |
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlParser | // XmlParser |
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), |
XmlParser::XmlParser(char* text, XmlNamespace* ns, Boolean hideEmptyTags) |
_restoreChar('\0'), _foundRoot(false) |
: _line(1), |
|
_current(text), |
|
_restoreChar('\0'), |
|
_foundRoot(false), |
|
_supportedNamespaces(ns), |
|
// Start valid indexes with -2. -1 is reserved for not found. |
|
_currentUnsupportedNSType(-2), |
|
_hideEmptyTags(hideEmptyTags) |
|
{ |
|
} |
|
|
|
inline void _skipWhitespace(Uint32& line, char*& p) |
|
{ |
|
while (*p && _isspace(*p)) |
|
{ |
|
if (*p == '\n') |
|
line++; |
|
|
|
p++; |
|
} |
|
} |
|
|
|
#if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \ |
|
defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) |
|
#pragma optimize( "", off ) |
|
#endif |
|
static int _getEntityRef(char*& p) |
|
{ |
|
if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) |
|
{ |
|
p += 3; |
|
return '>'; |
|
} |
|
|
|
if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) |
|
{ |
|
p += 3; |
|
return '<'; |
|
} |
|
|
|
if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && |
|
(p[4] == ';')) |
|
{ |
|
p += 5; |
|
return '\''; |
|
} |
|
|
|
if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && |
|
(p[4] == ';')) |
|
{ |
|
p += 5; |
|
return '"'; |
|
} |
|
|
|
if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) |
|
{ |
|
p += 4; |
|
return '&'; |
|
} |
|
|
|
return -1; |
|
} |
|
#if defined(PEGASUS_PLATFORM_WIN64_IA64_MSVC) || \ |
|
defined(PEGASUS_PLATFORM_WIN64_X86_64_MSVC) |
|
#pragma optimize( "", on ) |
|
#endif |
|
|
|
static inline int _getCharRef(char*& p) |
{ | { |
|
char* end; |
|
unsigned long ch; |
|
Boolean hex = false; |
| |
|
if (*p == 'x') |
|
{ |
|
hex = true; |
|
ch = strtoul(++p, &end, 16); |
|
} |
|
else |
|
{ |
|
ch = strtoul(p, &end, 10); |
|
} |
|
|
|
if ((end == p) || (*end != ';') || (ch > 255)) |
|
{ |
|
return -1; |
|
} |
|
|
|
if ((hex && (end - p > 4)) || (!hex && (end - p > 5))) |
|
{ |
|
return -1; |
|
} |
|
|
|
p = end + 1; |
|
|
|
return ch; |
|
} |
|
|
|
// Parse an entity reference or a character reference |
|
static inline int _getRef(Uint32 line, char*& p) |
|
{ |
|
int ch; |
|
|
|
if (*p == '#') |
|
{ |
|
ch = _getCharRef(++p); |
|
} |
|
else |
|
{ |
|
ch = _getEntityRef(p); |
|
} |
|
|
|
if (ch == -1) |
|
{ |
|
throw XmlException(XmlException::MALFORMED_REFERENCE, line); |
|
} |
|
|
|
return ch; |
} | } |
| |
Boolean XmlParser::next(XmlEntry& entry) |
static inline void _normalizeElementValue( |
|
Uint32& line, |
|
char*& p, |
|
Uint32 &textLen) |
|
{ |
|
// Process one character at a time: |
|
|
|
char* q = p; |
|
char *start = p; |
|
|
|
while (*p && (*p != '<')) |
|
{ |
|
if (_isspace(*p)) |
|
{ |
|
// Trim whitespace from the end of the value, but do not compress |
|
// whitespace within the value. |
|
|
|
const char* start = p; |
|
|
|
if (*p++ == '\n') |
|
{ |
|
line++; |
|
} |
|
|
|
_skipWhitespace(line, p); |
|
|
|
if (*p && (*p != '<')) |
|
{ |
|
// Transfer internal whitespace to q without compressing it. |
|
const char* i = start; |
|
while (i < p) |
|
{ |
|
*q++ = *i++; |
|
} |
|
} |
|
else |
|
{ |
|
// Do not transfer trailing whitespace to q. |
|
break; |
|
} |
|
} |
|
else if (*p == '&') |
|
{ |
|
// Process an entity reference or a character reference. |
|
|
|
*q++ = _getRef(line, ++p); |
|
} |
|
else |
|
{ |
|
*q++ = *p++; |
|
} |
|
} |
|
|
|
// If q got behind p, it is safe and necessary to null-terminate q |
|
|
|
if (q != p) |
|
{ |
|
*q = '\0'; |
|
} |
|
textLen = (Uint32)(q - start); |
|
} |
|
|
|
static inline void _normalizeAttributeValue( |
|
Uint32& line, |
|
char*& p, |
|
char end_char, |
|
char*& start) |
|
{ |
|
// Skip over leading whitespace: |
|
|
|
_skipWhitespace(line, p); |
|
start = p; |
|
|
|
// Process one character at a time: |
|
|
|
char* q = p; |
|
|
|
while (*p && (*p != end_char)) |
|
{ |
|
if (_isspace(*p)) |
|
{ |
|
// Compress sequences of whitespace characters to a single space |
|
// character. Update line number when newlines encountered. |
|
|
|
if (*p++ == '\n') |
|
{ |
|
line++; |
|
} |
|
|
|
*q++ = ' '; |
|
|
|
_skipWhitespace(line, p); |
|
} |
|
else if (*p == '&') |
|
{ |
|
// Process an entity reference or a character reference. |
|
|
|
*q++ = _getRef(line, ++p); |
|
} |
|
else |
|
{ |
|
*q++ = *p++; |
|
} |
|
} |
|
|
|
// Remove single trailing whitespace (consecutive whitespaces already |
|
// compressed above). Since p >= q, we can tell if we need to strip a |
|
// trailing space from q by looking at the end of p. We must not look at |
|
// the last character of p, though, if p is an empty string. |
|
Boolean adjust_q = (p != start) && _isspace(p[-1]); |
|
|
|
// We encountered a the end_char or a zero-terminator. |
|
|
|
*q = *p; |
|
|
|
if (adjust_q) |
|
{ |
|
q--; |
|
} |
|
|
|
// If q got behind p, it is safe and necessary to null-terminate q |
|
|
|
if (q != p) |
|
{ |
|
*q = '\0'; |
|
} |
|
} |
|
|
|
Boolean XmlParser::_next( |
|
XmlEntry& entry, |
|
Boolean includeComment) |
{ | { |
if (!_putBackStack.isEmpty()) | if (!_putBackStack.isEmpty()) |
{ | { |
|
|
_restoreChar = '\0'; | _restoreChar = '\0'; |
} | } |
| |
// Skip over any whitespace: |
entry.attributes.clear(); |
|
|
|
if (_supportedNamespaces) |
|
{ |
|
// Remove namespaces of a deeper scope level from the stack. |
|
while (!_nameSpaces.isEmpty() && |
|
_nameSpaces.top().scopeLevel > _stack.size()) |
|
{ |
|
_nameSpaces.pop(); |
|
} |
|
} |
| |
_skipWhitespace(_current); |
// Loop until we are done with comments if includeComment is false. |
|
do |
|
{ |
|
// Skip over any whitespace: |
|
_skipWhitespace(_line, _current); |
| |
if (!*_current) | if (!*_current) |
{ | { |
|
|
| |
_stack.pop(); | _stack.pop(); |
} | } |
|
|
return true; |
|
} | } |
else | else |
{ | { |
|
// Normalize the content: |
|
|
|
char* start = _current; |
|
Uint32 textLen; |
|
_normalizeElementValue(_line, _current, textLen); |
|
|
|
// Get the content: |
|
|
entry.type = XmlEntry::CONTENT; | entry.type = XmlEntry::CONTENT; |
entry.text = _current; |
entry.text = start; |
_getContent(_current); |
entry.textLen = textLen; |
|
|
|
// Overwrite '<' with a null character (temporarily). |
|
|
_restoreChar = *_current; | _restoreChar = *_current; |
*_current = '\0'; | *_current = '\0'; |
| |
if (nullTerminator) | if (nullTerminator) |
*nullTerminator = '\0'; | *nullTerminator = '\0'; |
|
} |
|
} while (!includeComment && entry.type == XmlEntry::COMMENT); |
|
|
|
if (_supportedNamespaces && |
|
(entry.type == XmlEntry::START_TAG || |
|
entry.type == XmlEntry::EMPTY_TAG || |
|
entry.type == XmlEntry::END_TAG)) |
|
{ |
|
// Determine the namespace type for this entry |
|
|
|
if (entry.type == XmlEntry::START_TAG || |
|
entry.type == XmlEntry::EMPTY_TAG) |
|
{ |
|
// Process namespace declarations and determine the namespace type |
|
// for the attributes. |
|
|
|
Uint32 scopeLevel = _stack.size(); |
|
if (entry.type == XmlEntry::EMPTY_TAG) |
|
{ |
|
// Empty tags are deeper scope, but not pushed onto the stack |
|
scopeLevel++; |
|
} |
|
|
|
for (Uint32 i = 0, n = entry.attributes.size(); i < n; i++) |
|
{ |
|
XmlAttribute& attr = entry.attributes[i]; |
|
if ((strncmp(attr.name, "xmlns:", 6) == 0) || |
|
(strcmp(attr.name, "xmlns") == 0)) |
|
{ |
|
// Process a namespace declaration |
|
XmlNamespace ns; |
|
if (attr.name[5] == ':') |
|
{ |
|
ns.localName = attr.localName; |
|
} |
|
else |
|
{ |
|
// Default name space has no local name |
|
ns.localName = 0; |
|
} |
|
ns.extendedName = attr.value; |
|
ns.scopeLevel = scopeLevel; |
|
ns.type = _getSupportedNamespaceType(ns.extendedName); |
|
|
|
// If the namespace is not supported, assign it a unique |
|
// negative identifier. |
|
if (ns.type == -1) |
|
{ |
|
ns.type = _currentUnsupportedNSType--; |
|
} |
| |
_substituteReferences((char*)entry.text); |
_nameSpaces.push(ns); |
_normalize((char*)entry.text); |
} |
|
else |
|
{ |
|
// Get the namespace type for this attribute. |
|
attr.nsType = _getNamespaceType(attr.name); |
|
} |
|
} |
|
} |
|
|
|
entry.nsType = _getNamespaceType(entry.text); |
|
} |
|
else |
|
{ |
|
entry.nsType = -1; |
|
} |
| |
return true; | return true; |
} | } |
|
|
|
Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment) |
|
{ |
|
if (_hideEmptyTags) |
|
{ |
|
// Get the next tag. |
|
|
|
if (!_next(entry, includeComment)) |
|
return false; |
|
|
|
// If an EMPTY_TAG is encountered, then convert it to a START_TAG and |
|
// push a matching END_TAG on the put-back stack. This hides every |
|
// EMPTY_TAG from the caller. |
|
|
|
if (entry.type == XmlEntry::EMPTY_TAG) |
|
{ |
|
entry.type = XmlEntry::START_TAG; |
|
|
|
XmlEntry tmp; |
|
tmp.type = XmlEntry::END_TAG; |
|
tmp.text = entry.text; |
|
tmp.nsType = entry.nsType; |
|
tmp.localName = entry.localName; |
|
|
|
_putBackStack.push(tmp); |
|
} |
|
|
|
return true; |
|
} |
|
else |
|
return _next(entry, includeComment); |
|
} |
|
|
|
// Get the namespace type of the given tag |
|
int XmlParser::_getNamespaceType(const char* tag) |
|
{ |
|
const char* pos = strchr(tag, ':'); |
|
|
|
// If ':' is not found, the tag is not namespace qualified and we |
|
// need to look for the default name space. |
|
|
|
// Search the namespace stack from the top |
|
for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--) |
|
{ |
|
// If ':' is found, look for the name space with the matching |
|
// local name... |
|
if ((pos && _nameSpaces[i].localName && |
|
!strncmp(_nameSpaces[i].localName, tag, pos - tag)) || |
|
// ... otherwise look for the default name space. It's the |
|
// one with localName set to NULL |
|
(!pos && !_nameSpaces[i].localName)) |
|
{ |
|
return _nameSpaces[i].type; |
|
} |
|
} |
|
|
|
// If the tag is namespace qualified, but the name space has not been |
|
// declared, it's malformed XML and we must throw an exception. |
|
// Note: The "xml" namespace is specifically defined by the W3C as a |
|
// reserved prefix ("http://www.w3.org/XML/1998/namespace"). |
|
if (pos && (strncmp(tag, "xml:", 4) != 0)) |
|
{ |
|
throw XmlException(XmlException::UNDECLARED_NAMESPACE, _line); |
|
} |
|
|
|
// Otherwise it's OK not to have a name space. |
|
return -1; |
|
} |
|
|
|
// Given the extended namespace name, find it in the table of supported |
|
// namespaces and return its type. |
|
int XmlParser::_getSupportedNamespaceType(const char* extendedName) |
|
{ |
|
for (Sint32 i = 0; |
|
_supportedNamespaces[i].localName != 0; |
|
i++) |
|
{ |
|
PEGASUS_ASSERT(_supportedNamespaces[i].type == i); |
|
if (!strcmp(_supportedNamespaces[i].extendedName, extendedName)) |
|
{ |
|
return _supportedNamespaces[i].type; |
|
} |
|
} |
|
return -1; |
|
} |
|
|
|
XmlNamespace* XmlParser::getNamespace(int nsType) |
|
{ |
|
for (Sint32 i = _nameSpaces.size() - 1; i >=0; i--) |
|
{ |
|
if (_nameSpaces[i].type == nsType) |
|
{ |
|
return &_nameSpaces[i]; |
|
} |
|
} |
|
return 0; |
} | } |
| |
void XmlParser::putBack(XmlEntry& entry) | void XmlParser::putBack(XmlEntry& entry) |
|
|
// Nothing to do! | // Nothing to do! |
} | } |
| |
void XmlParser::_skipWhitespace(char*& p) |
// A-Za-z0-9_-. (Note that ':' is not included and must be checked separately) |
|
static unsigned char _isInnerElementChar[] = |
{ | { |
while (*p && isspace(*p)) |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1, |
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
}; |
|
|
|
inline Boolean _getQName(char*& p, const char*& localName) |
{ | { |
if (*p == '\n') |
localName = p; |
_line++; |
|
| |
|
if (!CharSet::isAlNumUnder(Uint8(*p))) |
|
return false; |
|
|
|
p++; |
|
|
|
// No explicit test for NULL termination is needed. |
|
// On position 0 of the array false is returned. |
|
while (_isInnerElementChar[Uint8(*p)]) |
|
p++; |
|
|
|
// We've validated the prefix, now validate the local name |
|
if (*p == ':') |
|
{ |
|
localName = ++p; |
|
|
|
if (!CharSet::isAlNumUnder(Uint8(*p))) |
|
return false; |
|
|
|
p++; |
|
// No explicit test for NULL termination is needed. |
|
// On position 0 of the array false is returned. |
|
while (_isInnerElementChar[Uint8(*p)]) |
p++; | p++; |
} | } |
|
|
|
return true; |
} | } |
| |
Boolean XmlParser::_getElementName(char*& p) |
Boolean XmlParser::_getElementName(char*& p, const char*& localName) |
{ |
|
if (!isalpha(*p) && *p != '_') |
|
{ | { |
std::cout << __LINE__ << std::endl; |
if (!_getQName(p, localName)) |
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
} |
|
|
|
while (*p && |
|
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
p++; |
|
| |
// The next character must be a space: | // The next character must be a space: |
| |
if (isspace(*p)) |
if (_isspace(*p)) |
{ | { |
*p++ = '\0'; | *p++ = '\0'; |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
} | } |
| |
if (*p == '>') | if (*p == '>') |
|
|
return false; | return false; |
} | } |
| |
Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement) |
Boolean XmlParser::_getOpenElementName( |
|
char*& p, |
|
const char*& localName, |
|
Boolean& openCloseElement) |
{ | { |
openCloseElement = false; | openCloseElement = false; |
| |
if (!isalpha(*p) && *p != '_') |
if (!_getQName(p, localName)) |
{ |
|
std::cout << __LINE__ << std::endl; |
|
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
} |
|
|
|
while (*p && |
|
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
p++; |
|
| |
// The next character must be a space: | // The next character must be a space: |
| |
if (isspace(*p)) |
if (_isspace(*p)) |
{ | { |
*p++ = '\0'; | *p++ = '\0'; |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
} | } |
| |
if (*p == '>') | if (*p == '>') |
|
|
return false; | return false; |
} | } |
| |
void XmlParser::_getAttributeNameAndEqual(char*& p) |
void XmlParser::_getAttributeNameAndEqual(char*& p, const char*& localName) |
{ |
|
if (!isalpha(*p) && *p != '_') |
|
{ | { |
std::cout << __LINE__ << std::endl; |
if (!_getQName(p, localName)) |
throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); |
} |
|
|
|
while (*p && |
|
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
p++; |
|
| |
char* term = p; | char* term = p; |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
| |
if (*p != '=') | if (*p != '=') |
{ |
|
std::cout << __LINE__ << std::endl; |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); |
} |
|
| |
p++; | p++; |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
| |
*term = '\0'; | *term = '\0'; |
} | } |
| |
void XmlParser::_getAttributeValue(char*& p) |
|
{ |
|
// ATTN-B: handle values contained in semiquotes: |
|
|
|
if (*p != '"' && *p != '\'') |
|
{ |
|
std::cout << __LINE__ << std::endl; |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
} |
|
|
|
char startChar = *p++; |
|
|
|
while (*p && *p != startChar) |
|
p++; |
|
|
|
if (*p != startChar) |
|
{ |
|
std::cout << __LINE__ << std::endl; |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
} |
|
|
|
*p++ = '\0'; |
|
} |
|
|
|
void XmlParser::_getComment(char*& p) | void XmlParser::_getComment(char*& p) |
{ | { |
// Now p points to first non-whitespace character beyond "<--" sequence: | // Now p points to first non-whitespace character beyond "<--" sequence: |
|
|
p++; | p++; |
} | } |
| |
void XmlParser::_getContent(char*& p) |
|
{ |
|
while (*p && *p != '<') |
|
{ |
|
if (*p == '\n') |
|
_line++; |
|
|
|
p++; |
|
} |
|
} |
|
|
|
void XmlParser::_substituteReferences(char* text) |
|
{ |
|
Uint32 rem = strlen(text); |
|
|
|
for (char* p = text; *p; p++, rem--) |
|
{ |
|
if (*p == '&') |
|
{ |
|
// Look for predefined entity reference: |
|
|
|
Boolean found = false; |
|
|
|
for (Uint32 i = 0; i < _REFERENCES_SIZE; i++) |
|
{ |
|
Uint32 length = _references[i].length; |
|
const char* match = _references[i].match; |
|
|
|
if (strncmp(p, _references[i].match, length) == 0) |
|
{ |
|
found = true; |
|
*p = _references[i].replacement; |
|
char* q = p + length; |
|
rem = rem - length + 1; |
|
memmove(p + 1, q, rem); |
|
} |
|
} |
|
|
|
// If not found, then at least make sure it is well formed: |
|
|
|
if (!found) |
|
{ |
|
char* start = p; |
|
p++; |
|
|
|
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
|
|
|
if (isalpha(*p) || *p == '_') |
|
{ |
|
for (p++; *p && *p != ';'; p++) |
|
{ |
|
if (!isalnum(*p) && *p != '_') |
|
throw XmlException(code, _line); |
|
} |
|
} |
|
else if (*p == '#') |
|
{ |
|
for (p++ ; *p && *p != ';'; p++) |
|
{ |
|
if (!isdigit(*p)) |
|
throw XmlException(code, _line); |
|
} |
|
} |
|
|
|
if (*p != ';') |
|
throw XmlException(code, _line); |
|
|
|
rem -= p - start; |
|
} |
|
} |
|
} |
|
} |
|
|
|
static const char _EMPTY_STRING[] = ""; |
|
|
|
void XmlParser::_getElement(char*& p, XmlEntry& entry) | void XmlParser::_getElement(char*& p, XmlEntry& entry) |
{ | { |
entry.attributeCount = 0; |
|
|
|
//-------------------------------------------------------------------------- | //-------------------------------------------------------------------------- |
// Get the element name (expect one of these: '?', '!', [A-Za-z_]) | // Get the element name (expect one of these: '?', '!', [A-Za-z_]) |
//-------------------------------------------------------------------------- | //-------------------------------------------------------------------------- |
|
|
entry.type = XmlEntry::XML_DECLARATION; | entry.type = XmlEntry::XML_DECLARATION; |
entry.text = ++p; | entry.text = ++p; |
| |
Boolean openCloseElement = false; |
if (_getElementName(p, entry.localName)) |
|
|
if (_getElementName(p)) |
|
return; | return; |
} | } |
else if (*p == '!') | else if (*p == '!') |
|
|
entry.type = XmlEntry::CDATA; | entry.type = XmlEntry::CDATA; |
entry.text = p; | entry.text = p; |
_getCData(p); | _getCData(p); |
|
entry.textLen = strlen(entry.text); |
return; | return; |
} | } |
else if (memcmp(p, "DOCTYPE", 7) == 0) | else if (memcmp(p, "DOCTYPE", 7) == 0) |
{ | { |
entry.type = XmlEntry::DOCTYPE; | entry.type = XmlEntry::DOCTYPE; |
entry.text = _EMPTY_STRING; |
entry.text = ""; |
_getDocType(p); | _getDocType(p); |
return; | return; |
} | } |
|
|
entry.type = XmlEntry::END_TAG; | entry.type = XmlEntry::END_TAG; |
entry.text = ++p; | entry.text = ++p; |
| |
if (!_getElementName(p)) |
if (!_getElementName(p, entry.localName)) |
{ |
|
std::cout << __LINE__ << std::endl; |
|
throw(XmlException(XmlException::BAD_END_TAG, _line)); | throw(XmlException(XmlException::BAD_END_TAG, _line)); |
} |
|
| |
return; | return; |
} | } |
else if (isalpha(*p) || *p == '_') |
else if (CharSet::isAlphaUnder(Uint8(*p))) |
{ | { |
entry.type = XmlEntry::START_TAG; | entry.type = XmlEntry::START_TAG; |
entry.text = p; | entry.text = p; |
| |
Boolean openCloseElement = false; | Boolean openCloseElement = false; |
| |
if (_getOpenElementName(p, openCloseElement)) |
if (_getOpenElementName(p, entry.localName, openCloseElement)) |
{ | { |
if (openCloseElement) | if (openCloseElement) |
entry.type = XmlEntry::EMPTY_TAG; | entry.type = XmlEntry::EMPTY_TAG; |
|
|
} | } |
} | } |
else | else |
{ |
|
std::cout << __LINE__ << std::endl; |
|
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
} |
|
| |
//-------------------------------------------------------------------------- | //-------------------------------------------------------------------------- |
// Grab all the attributes: | // Grab all the attributes: |
|
|
} | } |
| |
XmlAttribute attr; | XmlAttribute attr; |
|
attr.nsType = -1; |
attr.name = p; | attr.name = p; |
_getAttributeNameAndEqual(p); |
_getAttributeNameAndEqual(p, attr.localName); |
| |
if (*p != '"' && *p != '\'') |
// Get the attribute value (e.g., "some value") |
|
{ |
|
if ((*p != '"') && (*p != '\'')) |
{ | { |
std::cout << __LINE__ << std::endl; |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
} | } |
| |
attr.value = p + 1; |
char quote = *p++; |
_getAttributeValue(p); |
|
|
char* start; |
|
_normalizeAttributeValue(_line, p, quote, start); |
|
attr.value = start; |
|
|
|
if (*p != quote) |
|
{ |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
} |
|
|
|
// Overwrite the closing quote with a null-terminator: |
|
|
|
*p++ = '\0'; |
|
} |
| |
if (entry.type == XmlEntry::XML_DECLARATION) | if (entry.type == XmlEntry::XML_DECLARATION) |
{ | { |
// The next thing must a space or a "?>": | // The next thing must a space or a "?>": |
| |
if (!(p[0] == '?' && p[1] == '>') && !isspace(*p)) |
if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p)) |
{ | { |
std::cout << __LINE__ << std::endl; |
|
throw XmlException( | throw XmlException( |
XmlException::BAD_ATTRIBUTE_VALUE, _line); | XmlException::BAD_ATTRIBUTE_VALUE, _line); |
} | } |
} | } |
else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p))) |
else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p))) |
{ | { |
// The next thing must be a space or a '>': | // The next thing must be a space or a '>': |
| |
std::cout << __LINE__ << std::endl; |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
} | } |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
|
|
if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) |
|
throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); |
|
| |
_substituteReferences((char*)attr.value); |
entry.attributes.append(attr); |
entry.attributes[entry.attributeCount++] = attr; |
|
} | } |
} | } |
| |
|
|
| |
void XmlEntry::print() const | void XmlEntry::print() const |
{ | { |
std::cout << "=== " << _typeStrings[type] << " "; |
PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; |
| |
Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT; | Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT; |
| |
if (needQuotes) | if (needQuotes) |
std::cout << "\""; |
PEGASUS_STD(cout) << "\""; |
| |
_printValue(text); | _printValue(text); |
| |
if (needQuotes) | if (needQuotes) |
std::cout << "\""; |
PEGASUS_STD(cout) << "\""; |
| |
std::cout << '\n'; |
PEGASUS_STD(cout) << '\n'; |
| |
for (Uint32 i = 0; i < attributeCount; i++) |
for (Uint32 i = 0, n = attributes.size(); i < n; i++) |
{ | { |
std::cout << " " << attributes[i].name << "=\""; |
PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; |
_printValue(attributes[i].value); | _printValue(attributes[i].value); |
std::cout << "\"" << std::endl; |
PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl); |
} | } |
} | } |
| |
const XmlAttribute* XmlEntry::findAttribute( | const XmlAttribute* XmlEntry::findAttribute( |
const char* name) const | const char* name) const |
{ | { |
for (Uint32 i = 0; i < attributeCount; i++) |
for (Uint32 i = 0, n = attributes.size(); i < n; i++) |
{ | { |
if (strcmp(attributes[i].name, name) == 0) | if (strcmp(attributes[i].name, name) == 0) |
return &attributes[i]; | return &attributes[i]; |
|
|
return 0; | return 0; |
} | } |
| |
|
const XmlAttribute* XmlEntry::findAttribute( |
|
int attrNsType, |
|
const char* name) const |
|
{ |
|
for (Uint32 i = 0, n = attributes.size(); i < n; i++) |
|
{ |
|
if ((attributes[i].nsType == attrNsType) && |
|
(strcmp(attributes[i].localName, name) == 0)) |
|
{ |
|
return &attributes[i]; |
|
} |
|
} |
|
|
|
return 0; |
|
} |
|
|
// Find first non-whitespace character (set first) and last non-whitespace | // Find first non-whitespace character (set first) and last non-whitespace |
// character (set last one past this). For example, consider this string: | // character (set last one past this). For example, consider this string: |
// | // |
|
|
{ | { |
first = str; | first = str; |
| |
while (isspace(*first)) |
while (_isspace(*first)) |
first++; | first++; |
| |
if (!*first) | if (!*first) |
|
|
| |
last = first + strlen(first); | last = first + strlen(first); |
| |
while (last != first && isspace(last[-1])) |
while (last != first && _isspace(last[-1])) |
last--; | last--; |
} | } |
| |
|
|
if (!end || end != last) | if (!end || end != last) |
return false; | return false; |
| |
value = Uint32(tmp); |
value = static_cast<Real32>(tmp); |
return true; | return true; |
} | } |
| |
|
|
if (!getAttributeValue(name, tmp)) | if (!getAttributeValue(name, tmp)) |
return false; | return false; |
| |
value = tmp; |
value = String(tmp); |
return true; | return true; |
} | } |
| |
|
void XmlAppendCString(Buffer& out, const char* str) |
|
{ |
|
out.append(str, static_cast<Uint32>(strlen(str))); |
|
} |
|
|
PEGASUS_NAMESPACE_END | PEGASUS_NAMESPACE_END |