version 1.9, 2001/06/03 17:05:03
|
version 1.42, 2007/07/30 06:50:57
|
|
|
//%///////////////////////////////////////////////////////////////////////////// |
//%2006//////////////////////////////////////////////////////////////////////// |
// |
|
// Copyright (c) 2000 The Open Group, BMC Software, Tivoli Systems, IBM |
|
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a |
// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development |
// copy of this software and associated documentation files (the "Software"), |
// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. |
// to deal in the Software without restriction, including without limitation |
// Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.; |
// the rights to use, copy, modify, merge, publish, distribute, sublicense, |
// IBM Corp.; EMC Corporation, The Open Group. |
// and/or sell copies of the Software, and to permit persons to whom the |
// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; |
// Software is furnished to do so, subject to the following conditions: |
// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. |
// |
// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
// EMC Corporation; VERITAS Software Corporation; The Open Group. |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
// Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
// EMC Corporation; Symantec Corporation; The Open Group. |
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
// |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
// Permission is hereby granted, free of charge, to any person obtaining a copy |
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
// of this software and associated documentation files (the "Software"), to |
// DEALINGS IN THE SOFTWARE. |
// deal in the Software without restriction, including without limitation the |
|
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
|
// sell copies of the Software, and to permit persons to whom the Software is |
|
// furnished to do so, subject to the following conditions: |
|
// |
|
// THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN |
|
// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED |
|
// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT |
|
// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
|
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
|
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
// | // |
//============================================================================== | //============================================================================== |
// | // |
// Author: Mike Brasher (mbrasher@bmc.com) |
|
// |
|
// Modified By: |
|
// |
|
//%///////////////////////////////////////////////////////////////////////////// | //%///////////////////////////////////////////////////////////////////////////// |
| |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
|
|
// " - full quote | // " - full quote |
// &apos - apostrophe | // &apos - apostrophe |
// | // |
|
// as well as character (numeric) references: |
|
// |
|
// 1 - decimal reference for character '1' |
|
// 1 - hexadecimal reference for character '1' |
|
// |
// 4. Element names and attribute names take the following form: | // 4. Element names and attribute names take the following form: |
// | // |
// [A-Za-z_][A-Za-z_0-9-.:] | // [A-Za-z_][A-Za-z_0-9-.:] |
|
|
// | // |
// TODO: | // TODO: |
// | // |
// Handle <!DOCTYPE...> sections which are complicated (containing |
// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is |
|
// work. Handle <!DOCTYPE...> sections which are complicated (containing |
// rules rather than references to files). | // rules rather than references to files). |
// | // |
// Handle reference of this form: "Α" |
|
// |
|
// Remove newlines from string literals: | // Remove newlines from string literals: |
// | // |
// Example: <xyz x="hello | // Example: <xyz x="hello |
|
|
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
|
#include <Pegasus/Common/Config.h> |
#include <cctype> | #include <cctype> |
#include <cassert> |
|
#include <cstdio> | #include <cstdio> |
#include <cstdlib> | #include <cstdlib> |
#include <cstring> | #include <cstring> |
#include "XmlParser.h" | #include "XmlParser.h" |
#include "Logger.h" | #include "Logger.h" |
|
#include "ExceptionRep.h" |
|
#include "CharSet.h" |
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
#define PEGASUS_ARRAY_T XmlEntry |
|
# include "ArrayImpl.h" |
|
#undef PEGASUS_ARRAY_T |
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// Static helper functions | // Static helper functions |
|
|
char replacement; | char replacement; |
}; | }; |
| |
|
// ATTN: Add support for more entity references |
static EntityReference _references[] = | static EntityReference _references[] = |
{ | { |
{ "&", 5, '&' }, | { "&", 5, '&' }, |
|
|
{ "'", 6, '\'' } | { "'", 6, '\'' } |
}; | }; |
| |
static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); |
|
|
|
// Remove all redundant spaces from the given string: |
|
|
|
static void _normalize(char* text) |
|
{ |
|
Uint32 length = strlen(text); |
|
char* p = text; |
|
char* end = p + length; |
|
|
|
// Remove leading spaces: |
|
|
|
while (isspace(*p)) |
|
p++; |
|
|
|
if (p != text) |
|
memmove(text, p, end - p + 1); |
|
| |
p = text; |
// Implements a check for a whitespace character, without calling |
|
// isspace( ). The isspace( ) function is locale-sensitive, |
// Look for sequences of more than one space and remove all but one. |
// and incorrectly flags some chars above 0x7f as whitespace. This |
|
// causes the XmlParser to incorrectly parse UTF-8 data. |
for (;;) |
// |
|
// Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml) |
|
// defines white space as: |
|
// S ::= (#x20 | #x9 | #xD | #xA)+ |
|
static inline int _isspace(char c) |
{ | { |
// Advance to the next space: |
return CharSet::isXmlWhiteSpace((Uint8)c); |
|
|
while (*p && !isspace(*p)) |
|
p++; |
|
|
|
if (!*p) |
|
break; |
|
|
|
// Advance to the next non-space: |
|
|
|
char* q = p++; |
|
|
|
while (isspace(*p)) |
|
p++; |
|
|
|
// Discard trailing spaces (if we are at the end): |
|
|
|
if (!*p) |
|
{ |
|
*q = '\0'; |
|
break; |
|
} | } |
| |
// Remove the redundant spaces: |
static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0])); |
|
|
Uint32 n = p - q; |
|
|
|
if (n > 1) |
|
{ |
|
*q++ = ' '; |
|
memmove(q, p, end - p + 1); |
|
p = q; |
|
} |
|
} |
|
} |
|
| |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
|
|
"Semantic error" | "Semantic error" |
}; | }; |
| |
|
static const char* _xmlKeys[] = |
|
{ |
|
"Common.XmlParser.BAD_START_TAG", |
|
"Common.XmlParser.BAD_END_TAG", |
|
"Common.XmlParser.BAD_ATTRIBUTE_NAME", |
|
"Common.XmlParser.EXPECTED_EQUAL_SIGN", |
|
"Common.XmlParser.BAD_ATTRIBUTE_VALUE", |
|
"Common.XmlParser.MINUS_MINUS_IN_COMMENT", |
|
"Common.XmlParser.UNTERMINATED_COMMENT", |
|
"Common.XmlParser.UNTERMINATED_CDATA", |
|
"Common.XmlParser.UNTERMINATED_DOCTYPE", |
|
"Common.XmlParser.TOO_MANY_ATTRIBUTES", |
|
"Common.XmlParser.MALFORMED_REFERENCE", |
|
"Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", |
|
"Common.XmlParser.START_END_MISMATCH", |
|
"Common.XmlParser.UNCLOSED_TAGS", |
|
"Common.XmlParser.MULTIPLE_ROOTS", |
|
"Common.XmlParser.VALIDATION_ERROR", |
|
"Common.XmlParser.SEMANTIC_ERROR" |
|
}; |
|
|
|
// l10n replace _formMessage (comment out the old one) |
|
/* |
static String _formMessage(Uint32 code, Uint32 line, const String& message) | static String _formMessage(Uint32 code, Uint32 line, const String& message) |
{ | { |
String result = _xmlMessages[Uint32(code) - 1]; | String result = _xmlMessages[Uint32(code) - 1]; |
|
|
| |
return result; | return result; |
} | } |
|
*/ |
|
|
|
static MessageLoaderParms _formMessage( |
|
Uint32 code, |
|
Uint32 line, |
|
const String& message) |
|
{ |
|
String dftMsg = _xmlMessages[Uint32(code) - 1]; |
|
String key = _xmlKeys[Uint32(code) - 1]; |
|
String msg = message; |
|
|
|
dftMsg.append(": on line $0"); |
|
if (message.size()) |
|
{ |
|
msg = ": " + msg; |
|
dftMsg.append("$1"); |
|
} |
|
|
|
return MessageLoaderParms(key, dftMsg, line ,msg); |
|
} |
|
|
|
static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) |
|
{ |
|
String dftMsg = _xmlMessages[Uint32(code) - 1]; |
|
String key = _xmlKeys[Uint32(code) - 1]; |
|
|
|
dftMsg.append(": on line $0"); |
|
|
|
return MessageLoaderParms(key, dftMsg, line); |
|
} |
|
|
| |
XmlException::XmlException( | XmlException::XmlException( |
XmlException::Code code, | XmlException::Code code, |
|
|
| |
} | } |
| |
|
|
|
XmlException::XmlException( |
|
XmlException::Code code, |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: Exception(_formPartialMessage(code, lineNumber)) |
|
{ |
|
if (msgParms.default_msg.size()) |
|
{ |
|
msgParms.default_msg = ": " + msgParms.default_msg; |
|
} |
|
_rep->message.append(MessageLoader::getMessage(msgParms)); |
|
} |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlValidationError | // XmlValidationError |
|
|
const String& message) | const String& message) |
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) | : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) |
{ | { |
|
} |
|
|
| |
|
XmlValidationError::XmlValidationError( |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) |
|
{ |
} | } |
| |
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlSemanticError | // XmlSemanticError |
|
|
const String& message) | const String& message) |
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) | : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) |
{ | { |
|
} |
| |
|
|
|
XmlSemanticError::XmlSemanticError( |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) |
|
{ |
} | } |
| |
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlParser | // XmlParser |
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), |
XmlParser::XmlParser(char* text) |
_restoreChar('\0'), _foundRoot(false) |
: _line(1), |
|
_current(text), |
|
_restoreChar('\0'), |
|
_foundRoot(false) |
|
{ |
|
} |
|
|
|
inline void _skipWhitespace(Uint32& line, char*& p) |
|
{ |
|
while (*p && _isspace(*p)) |
|
{ |
|
if (*p == '\n') |
|
line++; |
|
|
|
p++; |
|
} |
|
} |
|
|
|
static int _getEntityRef(char*& p) |
|
{ |
|
if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) |
|
{ |
|
p += 3; |
|
return '>'; |
|
} |
|
|
|
if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) |
|
{ |
|
p += 3; |
|
return '<'; |
|
} |
|
|
|
if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && |
|
(p[4] == ';')) |
|
{ |
|
p += 5; |
|
return '\''; |
|
} |
|
|
|
if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && |
|
(p[4] == ';')) |
|
{ |
|
p += 5; |
|
return '"'; |
|
} |
|
|
|
if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) |
|
{ |
|
p += 4; |
|
return '&'; |
|
} |
|
|
|
return -1; |
|
} |
|
|
|
static inline int _getCharRef(char*& p, bool hex) |
|
{ |
|
char* end; |
|
unsigned long ch; |
|
|
|
if (hex) |
|
{ |
|
ch = strtoul(p, &end, 16); |
|
} |
|
else |
|
{ |
|
ch = strtoul(p, &end, 10); |
|
} |
|
|
|
if ((end == p) || (*end != ';') || (ch > 255)) |
|
{ |
|
return -1; |
|
} |
|
|
|
if ((hex && (end - p > 4)) || (!hex && (end - p > 5))) |
|
{ |
|
return -1; |
|
} |
|
|
|
p = end + 1; |
|
|
|
return ch; |
|
} |
|
|
|
static void _normalize(Uint32& line, char*& p, char end_char, char*& start) |
|
{ |
|
// Skip over leading whitespace: |
|
|
|
_skipWhitespace(line, p); |
|
start = p; |
|
|
|
// Process one character at a time: |
|
|
|
char* q = p; |
|
|
|
while (*p && (*p != end_char)) |
|
{ |
|
if (_isspace(*p)) |
|
{ |
|
// Compress sequences of whitespace characters to a single space |
|
// character. Update line number when newlines encountered. |
|
|
|
if (*p++ == '\n') |
|
{ |
|
line++; |
|
} |
|
|
|
*q++ = ' '; |
|
|
|
_skipWhitespace(line, p); |
|
} |
|
else if (*p == '&') |
|
{ |
|
// Process entity characters and entity references: |
|
|
|
p++; |
|
int ch; |
|
|
|
if (*p == '#') |
|
{ |
|
*p++; |
|
|
|
if (*p == 'x') |
|
{ |
|
p++; |
|
ch = _getCharRef(p, true); |
|
} |
|
else |
|
{ |
|
ch = _getCharRef(p, false); |
|
} |
|
} |
|
else |
|
{ |
|
ch = _getEntityRef(p); |
|
} |
|
|
|
if (ch == -1) |
{ | { |
|
throw XmlException(XmlException::MALFORMED_REFERENCE, line); |
|
} |
| |
|
*q++ = ch; |
} | } |
|
else |
|
{ |
|
*q++ = *p++; |
|
} |
|
} |
|
|
|
// We encountered a the end_char or a zero-terminator. |
| |
Boolean XmlParser::next(XmlEntry& entry) |
*q = *p; |
|
|
|
// Remove single trailing whitespace (consecutive whitespaces already |
|
// compressed above). Since p >= q, we can tell if we need to strip a |
|
// trailing space from q by looking at the end of p. We must not look at |
|
// the last character of p, though, if p is an empty string. |
|
|
|
if ((p != start) && _isspace(p[-1])) |
|
{ |
|
q--; |
|
} |
|
|
|
// If q got behind p, it is safe and necessary to null-terminate q |
|
|
|
if (q != p) |
|
{ |
|
*q = '\0'; |
|
} |
|
} |
|
|
|
Boolean XmlParser::next(XmlEntry& entry, Boolean includeComment) |
{ | { |
if (!_putBackStack.isEmpty()) | if (!_putBackStack.isEmpty()) |
{ | { |
|
|
_restoreChar = '\0'; | _restoreChar = '\0'; |
} | } |
| |
|
// Loop until we are done with comments if includeComment is false. |
|
do |
|
{ |
// Skip over any whitespace: | // Skip over any whitespace: |
|
_skipWhitespace(_line, _current); |
_skipWhitespace(_current); |
|
| |
if (!*_current) | if (!*_current) |
{ | { |
|
|
| |
_stack.pop(); | _stack.pop(); |
} | } |
|
|
return true; |
|
} | } |
else | else |
{ | { |
|
// Normalize the content: |
|
|
|
char* start; |
|
_normalize(_line, _current, '<', start); |
|
|
|
// Get the content: |
|
|
entry.type = XmlEntry::CONTENT; | entry.type = XmlEntry::CONTENT; |
entry.text = _current; |
entry.text = start; |
_getContent(_current); |
|
|
// Overwrite '<' with a null character (temporarily). |
|
|
_restoreChar = *_current; | _restoreChar = *_current; |
*_current = '\0'; | *_current = '\0'; |
| |
if (nullTerminator) | if (nullTerminator) |
*nullTerminator = '\0'; | *nullTerminator = '\0'; |
|
} |
_substituteReferences((char*)entry.text); |
}while (!includeComment && entry.type == XmlEntry::COMMENT); |
_normalize((char*)entry.text); |
|
| |
return true; | return true; |
} | } |
} |
|
| |
void XmlParser::putBack(XmlEntry& entry) | void XmlParser::putBack(XmlEntry& entry) |
{ | { |
|
|
// Nothing to do! | // Nothing to do! |
} | } |
| |
void XmlParser::_skipWhitespace(char*& p) |
// A-Za-z0-9_-:. |
|
static unsigned char _isInnerElementChar[] = |
{ | { |
while (*p && isspace(*p)) |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
{ |
0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1, |
if (*p == '\n') |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
_line++; |
1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
p++; |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
} |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
} |
}; |
| |
Boolean XmlParser::_getElementName(char*& p) | Boolean XmlParser::_getElementName(char*& p) |
{ | { |
if (!isalpha(*p) && *p != '_') |
if (!CharSet::isAlNumUnder(Uint8(*p))) |
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
| |
while (*p && |
p++; |
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
|
while (*p && _isInnerElementChar[Uint8(*p)]) |
p++; | p++; |
| |
// The next character must be a space: | // The next character must be a space: |
| |
if (isspace(*p)) |
if (_isspace(*p)) |
{ | { |
*p++ = '\0'; | *p++ = '\0'; |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
} | } |
| |
if (*p == '>') | if (*p == '>') |
|
|
{ | { |
openCloseElement = false; | openCloseElement = false; |
| |
if (!isalpha(*p) && *p != '_') |
if (!CharSet::isAlNumUnder(Uint8(*p))) |
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
| |
while (*p && |
p++; |
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
|
while (*p && _isInnerElementChar[Uint8(*p)]) |
p++; | p++; |
| |
// The next character must be a space: | // The next character must be a space: |
| |
if (isspace(*p)) |
if (_isspace(*p)) |
{ | { |
*p++ = '\0'; | *p++ = '\0'; |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
} | } |
| |
if (*p == '>') | if (*p == '>') |
|
|
| |
void XmlParser::_getAttributeNameAndEqual(char*& p) | void XmlParser::_getAttributeNameAndEqual(char*& p) |
{ | { |
if (!isalpha(*p) && *p != '_') |
if (!CharSet::isAlNumUnder((Uint8)*p)) |
throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); |
| |
while (*p && |
p++; |
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
|
|
while (*p && _isInnerElementChar[Uint8(*p)]) |
p++; | p++; |
| |
char* term = p; | char* term = p; |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
| |
if (*p != '=') | if (*p != '=') |
throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); |
| |
p++; | p++; |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
| |
*term = '\0'; | *term = '\0'; |
} | } |
| |
void XmlParser::_getAttributeValue(char*& p) |
|
{ |
|
// ATTN-B: handle values contained in semiquotes: |
|
|
|
if (*p != '"' && *p != '\'') |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
|
|
char startChar = *p++; |
|
|
|
while (*p && *p != startChar) |
|
p++; |
|
|
|
if (*p != startChar) |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
|
|
*p++ = '\0'; |
|
} |
|
|
|
void XmlParser::_getComment(char*& p) | void XmlParser::_getComment(char*& p) |
{ | { |
// Now p points to first non-whitespace character beyond "<--" sequence: | // Now p points to first non-whitespace character beyond "<--" sequence: |
|
|
p++; | p++; |
} | } |
| |
void XmlParser::_getContent(char*& p) |
|
{ |
|
while (*p && *p != '<') |
|
{ |
|
if (*p == '\n') |
|
_line++; |
|
|
|
p++; |
|
} |
|
} |
|
|
|
void XmlParser::_substituteReferences(char* text) |
|
{ |
|
Uint32 rem = strlen(text); |
|
|
|
for (char* p = text; *p; p++, rem--) |
|
{ |
|
if (*p == '&') |
|
{ |
|
// Look for predefined entity reference: |
|
|
|
Boolean found = false; |
|
|
|
for (Uint32 i = 0; i < _REFERENCES_SIZE; i++) |
|
{ |
|
Uint32 length = _references[i].length; |
|
const char* match = _references[i].match; |
|
|
|
if (strncmp(p, _references[i].match, length) == 0) |
|
{ |
|
found = true; |
|
*p = _references[i].replacement; |
|
char* q = p + length; |
|
rem = rem - length + 1; |
|
memmove(p + 1, q, rem); |
|
} |
|
} |
|
|
|
// If not found, then at least make sure it is well formed: |
|
|
|
if (!found) |
|
{ |
|
char* start = p; |
|
p++; |
|
|
|
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
|
|
|
if (isalpha(*p) || *p == '_') |
|
{ |
|
for (p++; *p && *p != ';'; p++) |
|
{ |
|
if (!isalnum(*p) && *p != '_') |
|
throw XmlException(code, _line); |
|
} |
|
} |
|
else if (*p == '#') |
|
{ |
|
for (p++ ; *p && *p != ';'; p++) |
|
{ |
|
if (!isdigit(*p)) |
|
throw XmlException(code, _line); |
|
} |
|
} |
|
|
|
if (*p != ';') |
|
throw XmlException(code, _line); |
|
|
|
rem -= p - start; |
|
} |
|
} |
|
} |
|
} |
|
|
|
static const char _EMPTY_STRING[] = ""; |
|
|
|
void XmlParser::_getElement(char*& p, XmlEntry& entry) | void XmlParser::_getElement(char*& p, XmlEntry& entry) |
{ | { |
entry.attributeCount = 0; | entry.attributeCount = 0; |
|
|
else if (memcmp(p, "DOCTYPE", 7) == 0) | else if (memcmp(p, "DOCTYPE", 7) == 0) |
{ | { |
entry.type = XmlEntry::DOCTYPE; | entry.type = XmlEntry::DOCTYPE; |
entry.text = _EMPTY_STRING; |
entry.text = ""; |
_getDocType(p); | _getDocType(p); |
return; | return; |
} | } |
|
|
| |
return; | return; |
} | } |
else if (isalpha(*p) || *p == '_') |
else if ((((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
(*p == '_'))) |
{ | { |
entry.type = XmlEntry::START_TAG; | entry.type = XmlEntry::START_TAG; |
entry.text = p; | entry.text = p; |
|
|
attr.name = p; | attr.name = p; |
_getAttributeNameAndEqual(p); | _getAttributeNameAndEqual(p); |
| |
if (*p != '"' && *p != '\'') |
// Get the attribute value (e.g., "some value") |
|
{ |
|
if ((*p != '"') && (*p != '\'')) |
|
{ |
|
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
} |
|
|
|
char quote = *p++; |
|
|
|
char* start; |
|
_normalize(_line, p, quote, start); |
|
attr.value = start; |
|
|
|
if (*p != quote) |
|
{ |
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
|
} |
| |
attr.value = p + 1; |
// Overwrite the closing quote with a null-terminator: |
_getAttributeValue(p); |
|
|
*p++ = '\0'; |
|
} |
| |
if (entry.type == XmlEntry::XML_DECLARATION) | if (entry.type == XmlEntry::XML_DECLARATION) |
{ | { |
// The next thing must a space or a "?>": | // The next thing must a space or a "?>": |
| |
if (!(p[0] == '?' && p[1] == '>') && !isspace(*p)) |
if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p)) |
{ | { |
throw XmlException( | throw XmlException( |
XmlException::BAD_ATTRIBUTE_VALUE, _line); | XmlException::BAD_ATTRIBUTE_VALUE, _line); |
} | } |
} | } |
else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p))) |
else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p))) |
{ | { |
// The next thing must be a space or a '>': | // The next thing must be a space or a '>': |
| |
throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line); |
} | } |
| |
_skipWhitespace(p); |
_skipWhitespace(_line, p); |
| |
if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) | if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES) |
throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); | throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line); |
| |
_substituteReferences((char*)attr.value); |
|
entry.attributes[entry.attributeCount++] = attr; | entry.attributes[entry.attributeCount++] = attr; |
} | } |
} | } |
|
|
{ | { |
first = str; | first = str; |
| |
while (isspace(*first)) |
while (_isspace(*first)) |
first++; | first++; |
| |
if (!*first) | if (!*first) |
|
|
| |
last = first + strlen(first); | last = first + strlen(first); |
| |
while (last != first && isspace(last[-1])) |
while (last != first && _isspace(last[-1])) |
last--; | last--; |
} | } |
| |
|
|
if (!end || end != last) | if (!end || end != last) |
return false; | return false; |
| |
value = Uint32(tmp); |
value = static_cast<Real32>(tmp); |
return true; | return true; |
} | } |
| |
|
|
if (!getAttributeValue(name, tmp)) | if (!getAttributeValue(name, tmp)) |
return false; | return false; |
| |
value = tmp; |
value = String(tmp); |
return true; | return true; |
} | } |
| |
void XmlAppendCString(Array<Sint8>& out, const char* str) |
void XmlAppendCString(Buffer& out, const char* str) |
{ | { |
out.append(str, strlen(str)); |
out.append(str, static_cast<Uint32>(strlen(str))); |
} | } |
| |
PEGASUS_NAMESPACE_END | PEGASUS_NAMESPACE_END |