version 1.2, 2001/03/23 21:33:44
|
version 1.21, 2003/07/29 20:50:11
|
|
|
//BEGIN_LICENSE |
//%///////////////////////////////////////////////////////////////////////////// |
// | // |
// Copyright (c) 2000 The Open Group, BMC Software, Tivoli Systems, IBM |
// Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM, |
|
// The Open Group, Tivoli Systems |
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a |
// Permission is hereby granted, free of charge, to any person obtaining a copy |
// copy of this software and associated documentation files (the "Software"), |
// of this software and associated documentation files (the "Software"), to |
// to deal in the Software without restriction, including without limitation |
// deal in the Software without restriction, including without limitation the |
// the rights to use, copy, modify, merge, publish, distribute, sublicense, |
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
// and/or sell copies of the Software, and to permit persons to whom the |
// sell copies of the Software, and to permit persons to whom the Software is |
// Software is furnished to do so, subject to the following conditions: |
// furnished to do so, subject to the following conditions: |
|
// |
|
// THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN |
|
// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED |
|
// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT |
|
// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
|
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
|
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
// | // |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
//============================================================================== |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|
// DEALINGS IN THE SOFTWARE. |
|
// | // |
//END_LICENSE |
// Author: Mike Brasher (mbrasher@bmc.com) |
//BEGIN_HISTORY |
|
// | // |
// Author: |
// Modified By: |
// | // |
// $Log$ |
//%///////////////////////////////////////////////////////////////////////////// |
// Revision 1.2 2001/03/23 21:33:44 mike |
|
// Removed debugging output |
|
// |
|
// Revision 1.1.1.1 2001/01/14 19:53:23 mike |
|
// Pegasus import |
|
// |
|
// |
|
//END_HISTORY |
|
| |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
|
|
// " - full quote | // " - full quote |
// &apos - apostrophe | // &apos - apostrophe |
// | // |
|
// as well as character (numeric) references: |
|
|
|
// 1 - decimal reference for character '1' |
|
// 1 - hexadecimal reference for character '1' |
|
// |
// 4. Element names and attribute names take the following form: | // 4. Element names and attribute names take the following form: |
// | // |
// [A-Za-z_][A-Za-z_0-9-.:] | // [A-Za-z_][A-Za-z_0-9-.:] |
|
|
// | // |
// TODO: | // TODO: |
// | // |
|
// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work. |
// Handle <!DOCTYPE...> sections which are complicated (containing | // Handle <!DOCTYPE...> sections which are complicated (containing |
// rules rather than references to files). | // rules rather than references to files). |
// | // |
// Handle reference of this form: "Α" |
|
// |
|
// Remove newlines from string literals: | // Remove newlines from string literals: |
// | // |
// Example: <xyz x="hello | // Example: <xyz x="hello |
|
|
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
|
#include <Pegasus/Common/Config.h> |
#include <cctype> | #include <cctype> |
#include <cassert> |
|
#include <cstdio> | #include <cstdio> |
#include <cstdlib> | #include <cstdlib> |
#include <cstring> | #include <cstring> |
#include "XmlParser.h" | #include "XmlParser.h" |
|
#include "Logger.h" |
|
#include "ExceptionRep.h" |
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
|
#define PEGASUS_ARRAY_T XmlEntry |
|
# include "ArrayImpl.h" |
|
#undef PEGASUS_ARRAY_T |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// Static helper functions | // Static helper functions |
|
|
for (; *p; p++) | for (; *p; p++) |
{ | { |
if (*p == '\n') | if (*p == '\n') |
std::cout << "\\n"; |
PEGASUS_STD(cout) << "\\n"; |
else if (*p == '\r') | else if (*p == '\r') |
std::cout << "\\r"; |
PEGASUS_STD(cout) << "\\r"; |
else if (*p == '\t') | else if (*p == '\t') |
std::cout << "\\t"; |
PEGASUS_STD(cout) << "\\t"; |
else | else |
std::cout << *p; |
PEGASUS_STD(cout) << *p; |
} | } |
} | } |
| |
|
|
char replacement; | char replacement; |
}; | }; |
| |
|
// ATTN: Add support for more entity references |
static EntityReference _references[] = | static EntityReference _references[] = |
{ | { |
{ "&", 5, '&' }, | { "&", 5, '&' }, |
|
|
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
static char* _xmlMessages[] = |
static const char* _xmlMessages[] = |
{ | { |
"Bad opening element", | "Bad opening element", |
"Bad closing element", | "Bad closing element", |
|
|
"Semantic error" | "Semantic error" |
}; | }; |
| |
|
static const char* _xmlKeys[] = |
|
{ |
|
"Common.XmlParser.BAD_START_TAG", |
|
"Common.XmlParser.BAD_END_TAG", |
|
"Common.XmlParser.BAD_ATTRIBUTE_NAME", |
|
"Common.XmlParser.EXPECTED_EQUAL_SIGN", |
|
"Common.XmlParser.BAD_ATTRIBUTE_VALUE", |
|
"Common.XmlParser.MINUS_MINUS_IN_COMMENT", |
|
"Common.XmlParser.UNTERMINATED_COMMENT", |
|
"Common.XmlParser.UNTERMINATED_CDATA", |
|
"Common.XmlParser.UNTERMINATED_DOCTYPE", |
|
"Common.XmlParser.TOO_MANY_ATTRIBUTES", |
|
"Common.XmlParser.MALFORMED_REFERENCE", |
|
"Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", |
|
"Common.XmlParser.START_END_MISMATCH", |
|
"Common.XmlParser.UNCLOSED_TAGS", |
|
"Common.XmlParser.MULTIPLE_ROOTS", |
|
"Common.XmlParser.VALIDATION_ERROR", |
|
"Common.XmlParser.SEMANTIC_ERROR" |
|
}; |
|
|
|
// l10n TODO replace _formMessage with the commented one and uncomment |
|
// the new constructors |
|
/* |
static String _formMessage(Uint32 code, Uint32 line, const String& message) | static String _formMessage(Uint32 code, Uint32 line, const String& message) |
{ | { |
String result = _xmlMessages[Uint32(code) - 1]; | String result = _xmlMessages[Uint32(code) - 1]; |
|
|
result.append(": on line "); | result.append(": on line "); |
result.append(buffer); | result.append(buffer); |
| |
if (message.getLength()) |
if (message.size()) |
{ | { |
result.append(": "); | result.append(": "); |
result.append(message); | result.append(message); |
|
|
| |
return result; | return result; |
} | } |
|
*/ |
|
|
|
static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message) |
|
{ |
|
String dftMsg = _xmlMessages[Uint32(code) - 1]; |
|
String key = _xmlKeys[Uint32(code) - 1]; |
|
String msg = message; |
|
|
|
dftMsg.append(": on line $0"); |
|
if (message.size()) |
|
{ |
|
msg = ": " + msg; |
|
dftMsg.append("$1"); |
|
} |
|
|
|
return MessageLoaderParms(key, dftMsg, line ,msg); |
|
} |
|
|
|
static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) |
|
{ |
|
String dftMsg = _xmlMessages[Uint32(code) - 1]; |
|
String key = _xmlKeys[Uint32(code) - 1]; |
|
|
|
dftMsg.append(": on line $0"); |
|
|
|
return MessageLoaderParms(key, dftMsg, line); |
|
} |
|
|
| |
XmlException::XmlException( | XmlException::XmlException( |
XmlException::Code code, | XmlException::Code code, |
|
|
| |
} | } |
| |
|
|
|
XmlException::XmlException( |
|
XmlException::Code code, |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: Exception(_formPartialMessage(code, lineNumber)) |
|
{ |
|
if (msgParms.default_msg.size()) |
|
{ |
|
msgParms.default_msg = ": " + msgParms.default_msg; |
|
} |
|
_rep->message.append(MessageLoader::getMessage(msgParms)); |
|
} |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlValidationError | // XmlValidationError |
|
|
| |
} | } |
| |
|
|
|
XmlValidationError::XmlValidationError( |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) |
|
{ |
|
|
|
} |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlSemanticError | // XmlSemanticError |
|
|
XmlSemanticError::XmlSemanticError( | XmlSemanticError::XmlSemanticError( |
Uint32 lineNumber, | Uint32 lineNumber, |
const String& message) | const String& message) |
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, message) |
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message) |
{ | { |
| |
} | } |
| |
|
|
|
XmlSemanticError::XmlSemanticError( |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) |
|
{ |
|
|
|
} |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlParser | // XmlParser |
|
|
{ | { |
if (*p == '&') | if (*p == '&') |
{ | { |
// Look for predefined entity reference: |
// Process character or entity reference |
| |
Boolean found = false; |
Uint16 referenceChar = 0; |
|
Uint32 referenceLength = 0; |
|
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
| |
for (Uint32 i = 0; i < _REFERENCES_SIZE; i++) |
if (*(p+1) == '#') |
{ | { |
Uint32 length = _references[i].length; |
// Found a character (numeric) reference |
const char* match = _references[i].match; |
// Determine whether it is decimal or hex |
|
if (*(p+2) == 'x') |
|
{ |
|
// Decode a hexadecimal character reference |
|
char* q = p+3; |
| |
if (strncmp(p, _references[i].match, length) == 0) |
// At most four digits are allowed, plus trailing ';' |
|
Uint32 numDigits; |
|
for (numDigits = 0; numDigits < 5; numDigits++, q++) |
{ | { |
found = true; |
if (isdigit(*q)) |
*p = _references[i].replacement; |
{ |
char* q = p + length; |
referenceChar = (referenceChar << 4); |
rem = rem - length + 1; |
referenceChar += (*q - '0'); |
memmove(p + 1, q, rem); |
} |
|
else if ((*q >= 'A') && (*q <= 'F')) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - 'A' + 10); |
|
} |
|
else if ((*q >= 'a') && (*q <= 'f')) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - 'a' + 10); |
|
} |
|
else if (*q == ';') |
|
{ |
|
break; |
|
} |
|
else |
|
{ |
|
throw XmlException(code, _line); |
} | } |
} | } |
| |
// If not found, then at least make sure it is well formed: |
// Hex number must be 1 - 4 digits |
|
if ((numDigits == 0) || (numDigits > 4)) |
|
{ |
|
throw XmlException(code, _line); |
|
} |
| |
if (!found) |
// ATTN: Currently do not support 16-bit characters |
|
if (referenceChar > 0xff) |
{ | { |
char* start = p; |
// ATTN: Is there a good way to say "unsupported"? |
p++; |
throw XmlException(code, _line); |
|
} |
| |
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
referenceLength = numDigits + 4; |
|
} |
|
else |
|
{ |
|
// Decode a decimal character reference |
|
Uint32 newChar = 0; |
|
char* q = p+2; |
| |
if (isalpha(*p) || *p == '_') |
// At most five digits are allowed, plus trailing ';' |
|
Uint32 numDigits; |
|
for (numDigits = 0; numDigits < 6; numDigits++, q++) |
{ | { |
for (p++; *p && *p != ';'; p++) |
if (isdigit(*q)) |
|
{ |
|
newChar = (newChar * 10); |
|
newChar += (*q - '0'); |
|
} |
|
else if (*q == ';') |
|
{ |
|
break; |
|
} |
|
else |
{ | { |
if (!isalnum(*p) && *p != '_') |
|
throw XmlException(code, _line); | throw XmlException(code, _line); |
} | } |
} | } |
else if (*p == '#') |
|
|
// Decimal number must be 1 - 5 digits and fit in 16 bits |
|
if ((numDigits == 0) || (numDigits > 5) || |
|
(newChar > 0xffff)) |
{ | { |
for (p++ ; *p && *p != ';'; p++) |
throw XmlException(code, _line); |
|
} |
|
|
|
// ATTN: Currently do not support 16-bit characters |
|
if (newChar > 0xff) |
{ | { |
if (!isdigit(*p)) |
// ATTN: Is there a good way to say "unsupported"? |
throw XmlException(code, _line); | throw XmlException(code, _line); |
} | } |
|
|
|
referenceChar = Uint16(newChar); |
|
referenceLength = numDigits + 3; |
|
} |
} | } |
|
else |
|
{ |
|
// Check for entity reference |
|
// ATTN: Inefficient if many entity references are supported |
|
Uint32 i; |
|
for (i = 0; i < _REFERENCES_SIZE; i++) |
|
{ |
|
Uint32 length = _references[i].length; |
|
const char* match = _references[i].match; |
| |
if (*p != ';') |
if (strncmp(p, _references[i].match, length) == 0) |
throw XmlException(code, _line); |
{ |
|
referenceChar = _references[i].replacement; |
|
referenceLength = length; |
|
break; |
|
} |
|
} |
| |
rem -= p - start; |
if (i == _REFERENCES_SIZE) |
|
{ |
|
// Didn't recognize the entity reference |
|
// ATTN: Is there a good way to say "unsupported"? |
|
throw XmlException(code, _line); |
} | } |
} | } |
|
|
|
// Replace the reference with the correct character |
|
*p = (char)referenceChar; |
|
char* q = p + referenceLength; |
|
rem = rem - referenceLength + 1; |
|
memmove(p + 1, q, rem); |
|
} |
} | } |
} | } |
| |
|
|
| |
void XmlEntry::print() const | void XmlEntry::print() const |
{ | { |
std::cout << "=== " << _typeStrings[type] << " "; |
PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " "; |
| |
Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT; | Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT; |
| |
if (needQuotes) | if (needQuotes) |
std::cout << "\""; |
PEGASUS_STD(cout) << "\""; |
| |
_printValue(text); | _printValue(text); |
| |
if (needQuotes) | if (needQuotes) |
std::cout << "\""; |
PEGASUS_STD(cout) << "\""; |
| |
std::cout << '\n'; |
PEGASUS_STD(cout) << '\n'; |
| |
for (Uint32 i = 0; i < attributeCount; i++) | for (Uint32 i = 0; i < attributeCount; i++) |
{ | { |
std::cout << " " << attributes[i].name << "=\""; |
PEGASUS_STD(cout) << " " << attributes[i].name << "=\""; |
_printValue(attributes[i].value); | _printValue(attributes[i].value); |
std::cout << "\"" << std::endl; |
PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl); |
} | } |
} | } |
| |
|
|
return true; | return true; |
} | } |
| |
|
void XmlAppendCString(Array<Sint8>& out, const char* str) |
|
{ |
|
out.append(str, strlen(str)); |
|
} |
|
|
PEGASUS_NAMESPACE_END | PEGASUS_NAMESPACE_END |