version 1.10, 2001/06/16 17:30:38
|
version 1.18.6.3, 2003/08/14 11:55:42
|
|
|
//%///////////////////////////////////////////////////////////////////////////// | //%///////////////////////////////////////////////////////////////////////////// |
// | // |
// Copyright (c) 2000, 2001 The Open group, BMC Software, Tivoli Systems, IBM |
// Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM, |
|
// The Open Group, Tivoli Systems |
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a copy | // Permission is hereby granted, free of charge, to any person obtaining a copy |
// of this software and associated documentation files (the "Software"), to | // of this software and associated documentation files (the "Software"), to |
|
|
// " - full quote | // " - full quote |
// &apos - apostrophe | // &apos - apostrophe |
// | // |
|
// as well as character (numeric) references: |
|
|
|
// 1 - decimal reference for character '1' |
|
// 1 - hexadecimal reference for character '1' |
|
// |
// 4. Element names and attribute names take the following form: | // 4. Element names and attribute names take the following form: |
// | // |
// [A-Za-z_][A-Za-z_0-9-.:] | // [A-Za-z_][A-Za-z_0-9-.:] |
|
|
// | // |
// TODO: | // TODO: |
// | // |
|
// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work. |
// Handle <!DOCTYPE...> sections which are complicated (containing | // Handle <!DOCTYPE...> sections which are complicated (containing |
// rules rather than references to files). | // rules rather than references to files). |
// | // |
// Handle reference of this form: "Α" |
|
// |
|
// Remove newlines from string literals: | // Remove newlines from string literals: |
// | // |
// Example: <xyz x="hello | // Example: <xyz x="hello |
|
|
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
|
#include <Pegasus/Common/Config.h> |
#include <cctype> | #include <cctype> |
#include <cassert> |
|
#include <cstdio> | #include <cstdio> |
#include <cstdlib> | #include <cstdlib> |
#include <cstring> | #include <cstring> |
#include "XmlParser.h" | #include "XmlParser.h" |
#include "Logger.h" | #include "Logger.h" |
|
#include "ExceptionRep.h" |
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
|
|
char replacement; | char replacement; |
}; | }; |
| |
|
// ATTN: Add support for more entity references |
static EntityReference _references[] = | static EntityReference _references[] = |
{ | { |
{ "&", 5, '&' }, | { "&", 5, '&' }, |
|
|
"Semantic error" | "Semantic error" |
}; | }; |
| |
|
{ |
|
"Common.XmlParser.BAD_START_TAG", |
|
"Common.XmlParser.BAD_END_TAG", |
|
"Common.XmlParser.BAD_ATTRIBUTE_NAME", |
|
"Common.XmlParser.EXPECTED_EQUAL_SIGN", |
|
"Common.XmlParser.BAD_ATTRIBUTE_VALUE", |
|
"Common.XmlParser.MINUS_MINUS_IN_COMMENT", |
|
"Common.XmlParser.UNTERMINATED_COMMENT", |
|
"Common.XmlParser.UNTERMINATED_CDATA", |
|
"Common.XmlParser.UNTERMINATED_DOCTYPE", |
|
"Common.XmlParser.TOO_MANY_ATTRIBUTES", |
|
"Common.XmlParser.MALFORMED_REFERENCE", |
|
"Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", |
|
"Common.XmlParser.START_END_MISMATCH", |
|
"Common.XmlParser.UNCLOSED_TAGS", |
|
"Common.XmlParser.MULTIPLE_ROOTS", |
|
"Common.XmlParser.VALIDATION_ERROR", |
|
"Common.XmlParser.SEMANTIC_ERROR" |
|
}; |
|
|
|
// l10n replace _formMessage (comment out the old one) |
|
/* |
static String _formMessage(Uint32 code, Uint32 line, const String& message) | static String _formMessage(Uint32 code, Uint32 line, const String& message) |
{ | { |
String result = _xmlMessages[Uint32(code) - 1]; | String result = _xmlMessages[Uint32(code) - 1]; |
|
|
| |
return result; | return result; |
} | } |
|
*/ |
|
|
|
static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message) |
|
{ |
|
String dftMsg = _xmlMessages[Uint32(code) - 1]; |
|
String key = _xmlKeys[Uint32(code) - 1]; |
|
String msg = message; |
|
|
|
dftMsg.append(": on line $0"); |
|
if (message.size()) |
|
{ |
|
msg = ": " + msg; |
|
dftMsg.append("$1"); |
|
} |
|
|
|
return MessageLoaderParms(key, dftMsg, line ,msg); |
|
} |
|
|
|
static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line) |
|
{ |
|
String dftMsg = _xmlMessages[Uint32(code) - 1]; |
|
String key = _xmlKeys[Uint32(code) - 1]; |
|
|
|
dftMsg.append(": on line $0"); |
|
|
|
return MessageLoaderParms(key, dftMsg, line); |
|
} |
|
|
| |
XmlException::XmlException( | XmlException::XmlException( |
XmlException::Code code, | XmlException::Code code, |
|
|
| |
} | } |
| |
|
|
|
XmlException::XmlException( |
|
XmlException::Code code, |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: Exception(_formPartialMessage(code, lineNumber)) |
|
{ |
|
if (msgParms.default_msg.size()) |
|
{ |
|
msgParms.default_msg = ": " + msgParms.default_msg; |
|
} |
|
_rep->message.append(MessageLoader::getMessage(msgParms)); |
|
} |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlValidationError | // XmlValidationError |
|
|
| |
} | } |
| |
|
|
|
XmlValidationError::XmlValidationError( |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms) |
|
{ |
|
|
|
} |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlSemanticError | // XmlSemanticError |
|
|
| |
} | } |
| |
|
|
|
XmlSemanticError::XmlSemanticError( |
|
Uint32 lineNumber, |
|
MessageLoaderParms& msgParms) |
|
: XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms) |
|
{ |
|
|
|
} |
|
|
|
|
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
// | // |
// XmlParser | // XmlParser |
|
|
| |
Boolean XmlParser::_getElementName(char*& p) | Boolean XmlParser::_getElementName(char*& p) |
{ | { |
if (!isalpha(*p) && *p != '_') |
if (!String::isUTF8(p) || |
|
!(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
(*p == '_'))) |
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
|
p++; |
| |
while (*p && |
while ((*p) && |
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
((*p >= '0') && (*p <= '9')) || |
|
*p == '_' || *p == '-' || *p == ':' || *p == '.')) |
p++; | p++; |
| |
// The next character must be a space: | // The next character must be a space: |
|
|
{ | { |
openCloseElement = false; | openCloseElement = false; |
| |
if (!isalpha(*p) && *p != '_') |
if (!String::isUTF8(p) || |
|
!(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
(*p == '_'))) |
throw XmlException(XmlException::BAD_START_TAG, _line); | throw XmlException(XmlException::BAD_START_TAG, _line); |
|
p++; |
| |
while (*p && |
while ((*p) && |
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
((*p >= '0') && (*p <= '9')) || |
|
*p == '_' || *p == '-' || *p == ':' || *p == '.')) |
p++; | p++; |
| |
// The next character must be a space: | // The next character must be a space: |
|
|
| |
void XmlParser::_getAttributeNameAndEqual(char*& p) | void XmlParser::_getAttributeNameAndEqual(char*& p) |
{ | { |
if (!isalpha(*p) && *p != '_') |
if (!String::isUTF8(p) || |
|
!(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
(*p == '_'))) |
throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); | throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line); |
|
p++; |
| |
while (*p && |
while ((*p) && |
(isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.')) |
(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
((*p >= '0') && (*p <= '9')) || |
|
*p == '_' || *p == '-' || *p == ':' || *p == '.')) |
p++; | p++; |
| |
char* term = p; | char* term = p; |
|
|
{ | { |
if (*p == '&') | if (*p == '&') |
{ | { |
// Look for predefined entity reference: |
// Process character or entity reference |
| |
Boolean found = false; |
Uint16 referenceChar = 0; |
|
Uint32 referenceLength = 0; |
|
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
| |
for (Uint32 i = 0; i < _REFERENCES_SIZE; i++) |
if (*(p+1) == '#') |
{ | { |
Uint32 length = _references[i].length; |
// Found a character (numeric) reference |
const char* match = _references[i].match; |
// Determine whether it is decimal or hex |
|
if (*(p+2) == 'x') |
|
{ |
|
// Decode a hexadecimal character reference |
|
char* q = p+3; |
| |
if (strncmp(p, _references[i].match, length) == 0) |
// At most four digits are allowed, plus trailing ';' |
|
Uint32 numDigits; |
|
for (numDigits = 0; numDigits < 5; numDigits++, q++) |
{ | { |
found = true; |
if (isdigit(*q)) |
*p = _references[i].replacement; |
{ |
char* q = p + length; |
referenceChar = (referenceChar << 4); |
rem = rem - length + 1; |
referenceChar += (*q - '0'); |
memmove(p + 1, q, rem); |
} |
|
else if ((*q >= 'A') && (*q <= 'F')) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - 'A' + 10); |
|
} |
|
else if ((*q >= 'a') && (*q <= 'f')) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - 'a' + 10); |
|
} |
|
else if (*q == ';') |
|
{ |
|
break; |
|
} |
|
else |
|
{ |
|
throw XmlException(code, _line); |
} | } |
} | } |
| |
// If not found, then at least make sure it is well formed: |
// Hex number must be 1 - 4 digits |
|
if ((numDigits == 0) || (numDigits > 4)) |
|
{ |
|
throw XmlException(code, _line); |
|
} |
| |
if (!found) |
// ATTN: Currently do not support 16-bit characters |
|
if (referenceChar > 0xff) |
{ | { |
char* start = p; |
// ATTN: Is there a good way to say "unsupported"? |
p++; |
throw XmlException(code, _line); |
|
} |
| |
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
referenceLength = numDigits + 4; |
|
} |
|
else |
|
{ |
|
// Decode a decimal character reference |
|
Uint32 newChar = 0; |
|
char* q = p+2; |
| |
if (isalpha(*p) || *p == '_') |
// At most five digits are allowed, plus trailing ';' |
|
Uint32 numDigits; |
|
for (numDigits = 0; numDigits < 6; numDigits++, q++) |
|
{ |
|
if (isdigit(*q)) |
|
{ |
|
newChar = (newChar * 10); |
|
newChar += (*q - '0'); |
|
} |
|
else if (*q == ';') |
{ | { |
for (p++; *p && *p != ';'; p++) |
break; |
|
} |
|
else |
{ | { |
if (!isalnum(*p) && *p != '_') |
|
throw XmlException(code, _line); | throw XmlException(code, _line); |
} | } |
} | } |
else if (*p == '#') |
|
|
// Decimal number must be 1 - 5 digits and fit in 16 bits |
|
if ((numDigits == 0) || (numDigits > 5) || |
|
(newChar > 0xffff)) |
{ | { |
for (p++ ; *p && *p != ';'; p++) |
throw XmlException(code, _line); |
|
} |
|
|
|
// ATTN: Currently do not support 16-bit characters |
|
if (newChar > 0xff) |
{ | { |
if (!isdigit(*p)) |
// ATTN: Is there a good way to say "unsupported"? |
throw XmlException(code, _line); | throw XmlException(code, _line); |
} | } |
|
|
|
referenceChar = Uint16(newChar); |
|
referenceLength = numDigits + 3; |
|
} |
|
} |
|
else |
|
{ |
|
// Check for entity reference |
|
// ATTN: Inefficient if many entity references are supported |
|
Uint32 i; |
|
for (i = 0; i < _REFERENCES_SIZE; i++) |
|
{ |
|
Uint32 length = _references[i].length; |
|
const char* match = _references[i].match; |
|
|
|
if (strncmp(p, _references[i].match, length) == 0) |
|
{ |
|
referenceChar = _references[i].replacement; |
|
referenceLength = length; |
|
break; |
|
} |
} | } |
| |
if (*p != ';') |
if (i == _REFERENCES_SIZE) |
|
{ |
|
// Didn't recognize the entity reference |
|
// ATTN: Is there a good way to say "unsupported"? |
throw XmlException(code, _line); | throw XmlException(code, _line); |
|
|
rem -= p - start; |
|
} | } |
} | } |
|
|
|
// Replace the reference with the correct character |
|
*p = (char)referenceChar; |
|
char* q = p + referenceLength; |
|
rem = rem - referenceLength + 1; |
|
memmove(p + 1, q, rem); |
|
} |
} | } |
} | } |
| |
|
|
| |
return; | return; |
} | } |
else if (isalpha(*p) || *p == '_') |
else if (String::isUTF8(p) && |
|
(((*p >= 'A') && (*p <= 'Z')) || |
|
((*p >= 'a') && (*p <= 'z')) || |
|
(*p == '_'))) |
{ | { |
entry.type = XmlEntry::START_TAG; | entry.type = XmlEntry::START_TAG; |
entry.text = p; | entry.text = p; |
|
|
if (!getAttributeValue(name, tmp)) | if (!getAttributeValue(name, tmp)) |
return false; | return false; |
| |
value = tmp; |
value = String(tmp,STRING_FLAG_UTF8); |
return true; | return true; |
} | } |
| |