version 1.10, 2001/06/16 17:30:38
|
version 1.17.8.1, 2002/12/05 01:47:13
|
|
|
//%///////////////////////////////////////////////////////////////////////////// | //%///////////////////////////////////////////////////////////////////////////// |
// | // |
// Copyright (c) 2000, 2001 The Open group, BMC Software, Tivoli Systems, IBM |
// Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM, |
|
// The Open Group, Tivoli Systems |
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a copy | // Permission is hereby granted, free of charge, to any person obtaining a copy |
// of this software and associated documentation files (the "Software"), to | // of this software and associated documentation files (the "Software"), to |
|
|
// " - full quote | // " - full quote |
// &apos - apostrophe | // &apos - apostrophe |
// | // |
|
// as well as character (numeric) references: |
|
|
|
// 1 - decimal reference for character '1' |
|
// 1 - hexadecimal reference for character '1' |
|
// |
// 4. Element names and attribute names take the following form: | // 4. Element names and attribute names take the following form: |
// | // |
// [A-Za-z_][A-Za-z_0-9-.:] | // [A-Za-z_][A-Za-z_0-9-.:] |
|
|
// | // |
// TODO: | // TODO: |
// | // |
|
// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work. |
// Handle <!DOCTYPE...> sections which are complicated (containing | // Handle <!DOCTYPE...> sections which are complicated (containing |
// rules rather than references to files). | // rules rather than references to files). |
// | // |
// Handle reference of this form: "Α" |
|
// |
|
// Remove newlines from string literals: | // Remove newlines from string literals: |
// | // |
// Example: <xyz x="hello | // Example: <xyz x="hello |
|
|
// | // |
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// |
| |
|
#include <Pegasus/Common/Config.h> |
#include <cctype> | #include <cctype> |
#include <cassert> |
|
#include <cstdio> | #include <cstdio> |
#include <cstdlib> | #include <cstdlib> |
#include <cstring> | #include <cstring> |
|
|
char replacement; | char replacement; |
}; | }; |
| |
|
// ATTN: Add support for more entity references |
static EntityReference _references[] = | static EntityReference _references[] = |
{ | { |
{ "&", 5, '&' }, | { "&", 5, '&' }, |
|
|
{ | { |
if (*p == '&') | if (*p == '&') |
{ | { |
// Look for predefined entity reference: |
// Process character or entity reference |
| |
Boolean found = false; |
Uint16 referenceChar = 0; |
|
Uint32 referenceLength = 0; |
|
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
| |
for (Uint32 i = 0; i < _REFERENCES_SIZE; i++) |
if (*(p+1) == '#') |
{ | { |
Uint32 length = _references[i].length; |
// Found a character (numeric) reference |
const char* match = _references[i].match; |
// Determine whether it is decimal or hex |
|
if (*(p+2) == 'x') |
|
{ |
|
// Decode a hexadecimal character reference |
|
char* q = p+3; |
| |
if (strncmp(p, _references[i].match, length) == 0) |
// At most four digits are allowed, plus trailing ';' |
|
Uint32 numDigits; |
|
for (numDigits = 0; numDigits < 5; numDigits++, q++) |
{ | { |
found = true; |
if (isdigit(*q)) |
*p = _references[i].replacement; |
{ |
char* q = p + length; |
referenceChar = (referenceChar << 4); |
rem = rem - length + 1; |
referenceChar += (*q - '0'); |
memmove(p + 1, q, rem); |
} |
|
else if ((*q >= 'A') && (*q <= 'F')) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - 'A' + 10); |
|
} |
|
else if ((*q >= 'a') && (*q <= 'f')) |
|
{ |
|
referenceChar = (referenceChar << 4); |
|
referenceChar += (*q - 'a' + 10); |
|
} |
|
else if (*q == ';') |
|
{ |
|
break; |
|
} |
|
else |
|
{ |
|
throw XmlException(code, _line); |
} | } |
} | } |
| |
// If not found, then at least make sure it is well formed: |
// Hex number must be 1 - 4 digits |
|
if ((numDigits == 0) || (numDigits > 4)) |
|
{ |
|
throw XmlException(code, _line); |
|
} |
| |
if (!found) |
// ATTN: Currently do not support 16-bit characters |
|
if (referenceChar > 0xff) |
{ | { |
char* start = p; |
// ATTN: Is there a good way to say "unsupported"? |
p++; |
throw XmlException(code, _line); |
|
} |
| |
XmlException::Code code = XmlException::MALFORMED_REFERENCE; |
referenceLength = numDigits + 4; |
|
} |
|
else |
|
{ |
|
// Decode a decimal character reference |
|
Uint32 newChar = 0; |
|
char* q = p+2; |
| |
if (isalpha(*p) || *p == '_') |
// At most five digits are allowed, plus trailing ';' |
|
Uint32 numDigits; |
|
for (numDigits = 0; numDigits < 6; numDigits++, q++) |
|
{ |
|
if (isdigit(*q)) |
|
{ |
|
newChar = (newChar * 10); |
|
newChar += (*q - '0'); |
|
} |
|
else if (*q == ';') |
{ | { |
for (p++; *p && *p != ';'; p++) |
break; |
|
} |
|
else |
{ | { |
if (!isalnum(*p) && *p != '_') |
|
throw XmlException(code, _line); | throw XmlException(code, _line); |
} | } |
} | } |
else if (*p == '#') |
|
|
// Decimal number must be 1 - 5 digits and fit in 16 bits |
|
if ((numDigits == 0) || (numDigits > 5) || |
|
(newChar > 0xffff)) |
{ | { |
for (p++ ; *p && *p != ';'; p++) |
throw XmlException(code, _line); |
|
} |
|
|
|
// ATTN: Currently do not support 16-bit characters |
|
if (newChar > 0xff) |
{ | { |
if (!isdigit(*p)) |
// ATTN: Is there a good way to say "unsupported"? |
throw XmlException(code, _line); | throw XmlException(code, _line); |
} | } |
|
|
|
referenceChar = Uint16(newChar); |
|
referenceLength = numDigits + 4; |
} | } |
|
} |
|
else |
|
{ |
|
// Check for entity reference |
|
// ATTN: Inefficient if many entity references are supported |
|
Uint32 i; |
|
for (i = 0; i < _REFERENCES_SIZE; i++) |
|
{ |
|
Uint32 length = _references[i].length; |
|
const char* match = _references[i].match; |
| |
if (*p != ';') |
if (strncmp(p, _references[i].match, length) == 0) |
throw XmlException(code, _line); |
{ |
|
referenceChar = _references[i].replacement; |
|
referenceLength = length; |
|
break; |
|
} |
|
} |
| |
rem -= p - start; |
if (i == _REFERENCES_SIZE) |
|
{ |
|
// Didn't recognize the entity reference |
|
// ATTN: Is there a good way to say "unsupported"? |
|
throw XmlException(code, _line); |
} | } |
} | } |
|
|
|
// Replace the reference with the correct character |
|
*p = (char)referenceChar; |
|
char* q = p + referenceLength; |
|
rem = rem - referenceLength + 1; |
|
memmove(p + 1, q, rem); |
|
} |
} | } |
} | } |
| |