version 1.3, 2015/04/20 18:10:36
|
version 1.4, 2015/04/20 18:20:37
|
|
|
#include <stdlib.h> | #include <stdlib.h> |
#include <stdarg.h> | #include <stdarg.h> |
#include <ctype.h> | #include <ctype.h> |
#include <base/io.h> |
#include <pal/format.h> |
| |
#if defined(_MSC_VER) | #if defined(_MSC_VER) |
/* PreFast - reviewed and believed to be false-positive*/ | /* PreFast - reviewed and believed to be false-positive*/ |
|
|
| |
#endif /* _MSC_VER */ | #endif /* _MSC_VER */ |
| |
|
#if defined(CONFIG_ENABLE_WCHAR) |
|
# define T(STR) L##STR |
|
# define XML_strtoul wcstoul |
|
# define XML_strcmp wcscmp |
|
# define XML_strlen wcslen |
|
#else |
|
# define T(STR) STR |
|
# define T(STR) STR |
|
# define XML_strtoul strtoul |
|
# define XML_strcmp strcmp |
|
# define XML_strlen strlen |
|
#endif |
|
|
/* | /* |
**============================================================================== | **============================================================================== |
** | ** |
|
|
* _spaceChar['\t'] => 2 | * _spaceChar['\t'] => 2 |
* _spaceChar[' '] => 2 | * _spaceChar[' '] => 2 |
*/ | */ |
static unsigned char _spaceChar[256] = |
static const unsigned char _spaceChar[256] = |
{ | { |
0,0,0,0,0,0,0,0,0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
}; | }; |
| |
INLINE int _IsSpace(char c) |
INLINE int _IsSpace(XML_Char c) |
{ | { |
return _spaceChar[(unsigned int)c]; |
#if defined(CONFIG_ENABLE_WCHAR) |
|
if (c >= 0 && c < 256) |
|
return _spaceChar[(unsigned char)c]; |
|
else |
|
return 0; |
|
#else |
|
return _spaceChar[(unsigned char)c]; |
|
#endif |
} | } |
| |
/* Matches XML name characters of the form: [A-Za-z_][A-Za-z0-9_-.:]* | /* Matches XML name characters of the form: [A-Za-z_][A-Za-z0-9_-.:]* |
* _nameChar[A-Za-z_] => 2 (first character) | * _nameChar[A-Za-z_] => 2 (first character) |
* _nameChar[A-Za-z0-9_-.:] => 1 or 2 (inner character) | * _nameChar[A-Za-z0-9_-.:] => 1 or 2 (inner character) |
*/ | */ |
static unsigned char _nameChar[256] = |
static const unsigned char _nameChar[256] = |
{ | { |
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, |
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
}; | }; |
| |
INLINE int _IsFirst(char c) |
/* |
|
Skips N characters while checking that end of the string has not been hit |
|
*/ |
|
INLINE XML_Char * _SkipChars(_In_z_ XML_Char* p, size_t count) |
|
{ |
|
size_t i; |
|
for (i = 0; i < count; ++i) |
|
{ |
|
if (*p == 0) |
|
{ |
|
return NULL; |
|
} |
|
|
|
p++; |
|
} |
|
|
|
return p; |
|
} |
|
|
|
INLINE int _IsFirst(XML_Char c) |
{ | { |
return _nameChar[(unsigned int)c] & 2; |
#if defined(CONFIG_ENABLE_WCHAR) |
|
if (c >= 0 && c < 256) |
|
return _nameChar[(unsigned char)c] & 2; |
|
else |
|
return 0; |
|
#else |
|
return _nameChar[(unsigned char)c] & 2; |
|
#endif |
} | } |
| |
INLINE int _IsInner(char c) |
INLINE int _IsInner(XML_Char c) |
{ | { |
|
#if defined(CONFIG_ENABLE_WCHAR) |
|
if (c >= 0 && c < 256) |
|
return _nameChar[(unsigned char)c]; |
|
else |
|
return 0; |
|
#else |
return _nameChar[(unsigned char)c]; | return _nameChar[(unsigned char)c]; |
|
#endif |
} | } |
| |
INLINE char* _SkipInner(char* p) |
INLINE XML_Char* _SkipInner(_In_z_ XML_Char* p) |
{ | { |
while (_IsInner(*p)) |
while (*p && _IsInner(*p)) |
p++; | p++; |
| |
return p; | return p; |
} | } |
| |
static char* _SkipSpacesAux(XML* self, char* p) |
static XML_Char* _SkipSpacesAux(_Inout_ XML* self, _In_z_ XML_Char* p) |
{ | { |
unsigned char x; |
XML_UChar x; |
size_t n = 0; | size_t n = 0; |
| |
while ((x = (unsigned char)_IsSpace(*p)) != 0) |
while (*p && (x = (XML_UChar)_IsSpace(*p)) != 0) |
{ | { |
n += 0x01 & x; | n += 0x01 & x; |
p++; | p++; |
|
|
return p; | return p; |
} | } |
| |
INLINE char* _SkipSpaces(XML* self, char* p) |
INLINE XML_Char* _SkipSpaces(_Inout_ XML* self, _In_z_ XML_Char* p) |
{ | { |
if (!_IsSpace(p[0])) |
if (!p[0] || !_IsSpace(p[0])) |
return p; | return p; |
| |
if (p[0] == '\n') | if (p[0] == '\n') |
self->line++; | self->line++; |
| |
if (!_IsSpace(p[1])) |
if (!p[1] || !_IsSpace(p[1])) |
return &p[1]; | return &p[1]; |
| |
if (p[1] == '\n') | if (p[1] == '\n') |
self->line++; | self->line++; |
| |
if (!_IsSpace(p[2])) |
if (!p[2] || !_IsSpace(p[2])) |
return &p[2]; | return &p[2]; |
| |
if (p[2] == '\n') | if (p[2] == '\n') |
|
|
return _SkipSpacesAux(self, &p[3]); | return _SkipSpacesAux(self, &p[3]); |
} | } |
| |
INLINE char* _ToEntityRef(XML* self, char* p, char* ch) |
INLINE XML_Char* _ToEntityRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch) |
{ | { |
/* Note: we collected the following statistics on the frequency of | /* Note: we collected the following statistics on the frequency of |
* each entity reference in a large body of XML documents: | * each entity reference in a large body of XML documents: |
|
|
return p + 4; | return p + 4; |
} | } |
| |
XML_Raise(self, "bad entity reference"); |
*ch = 0; |
|
XML_Raise(self, XML_ERROR_BAD_ENTITY_REFERENCE); |
return p; | return p; |
} | } |
| |
INLINE char* _ToCharRef(XML* self, char* p, char* ch) |
INLINE XML_Char* _ToCharRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch) |
{ | { |
char* end; |
XML_Char* end = NULL; |
unsigned long x; | unsigned long x; |
| |
if (*p == 'x') | if (*p == 'x') |
{ | { |
p++; | p++; |
x = strtoul(p, &end, 16); |
x = XML_strtoul(p, &end, 16); |
} | } |
else | else |
{ | { |
x = strtoul(p, &end, 10); |
x = XML_strtoul(p, &end, 10); |
} | } |
| |
if (end == p || *end != ';' || x > 255) | if (end == p || *end != ';' || x > 255) |
{ | { |
*ch = '\0'; | *ch = '\0'; |
XML_Raise(self, "bad character reference"); |
XML_Raise(self, XML_ERROR_BAD_CHARACTER_REFERENCE); |
return p; | return p; |
} | } |
| |
*ch = (char)x; |
*ch = (XML_Char)x; |
| |
return end + 1; | return end + 1; |
} | } |
| |
INLINE char* _ToRef(XML* self, char* p, char* ch) |
INLINE XML_Char* _ToRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch) |
{ | { |
/* Examples: | /* Examples: |
* @ | * @ |
|
|
return _ToEntityRef(self, p, ch); | return _ToEntityRef(self, p, ch); |
} | } |
| |
/* Reduce entity references and remove leading and trailing whitespace */ |
|
static char* _ReduceAttrValue(XML* self, char** pInOut, char eos) |
|
{ |
|
/* Matches all but '\0', '\'', '"', and '&'. All matching charcters | /* Matches all but '\0', '\'', '"', and '&'. All matching charcters |
* yeild 2, except for '\n', which yields 1 | * yeild 2, except for '\n', which yields 1 |
*/ | */ |
static unsigned char _match[256] = |
static const unsigned char _ReduceAttrValueMatchChars[256] = |
{ | { |
0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | 0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | 1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
}; | }; |
char* p = *pInOut; |
|
char* end = p; |
INLINE int _ReduceAttrValueMatch(XML_Char c) |
|
{ |
|
#if defined(CONFIG_ENABLE_WCHAR) |
|
if (c >= 0 && c < 256) |
|
return _ReduceAttrValueMatchChars[(unsigned char)c]; |
|
else |
|
return 1; |
|
#else |
|
return _ReduceAttrValueMatchChars[(unsigned char)c]; |
|
#endif |
|
} |
|
|
|
/* Reduce entity references and remove leading and trailing whitespace */ |
|
static XML_Char* _ReduceAttrValue(_Inout_ XML* self, _Inout_ XMLCharPtr* pInOut, XML_Char eos) |
|
{ |
|
_Null_terminated_ XML_Char* p = *pInOut; |
|
_Null_terminated_ XML_Char* end; |
size_t n = 0; | size_t n = 0; |
| |
/* Skip uninteresting characters */ |
if (!p) |
for (;;) |
return NULL; |
|
|
|
while (*p) |
|
{ |
|
if (_ReduceAttrValueMatch(*p)) |
{ | { |
while (_match[(unsigned char)*p]) |
|
p++; | p++; |
|
} |
if (*p != '\n') |
else |
|
{ |
|
if (*p == '\0' || *p != '\n') |
break; | break; |
| |
self->line++; | self->line++; |
p++; | p++; |
} | } |
|
} |
| |
end = p; | end = p; |
| |
|
|
{ | { |
if (*p == '&') | if (*p == '&') |
{ | { |
char c = '\0'; |
XML_Char c = '\0'; |
char* tmp; |
_Null_terminated_ XML_Char* tmp; |
| |
p++; | p++; |
tmp = _ToRef(self, p, &c); | tmp = _ToRef(self, p, &c); |
|
|
return end; | return end; |
} | } |
| |
/* Reduce character data, advance p, and return pointer to end */ |
|
static char* _ReduceCharData(XML* self, char** pInOut) |
|
{ |
|
/* Match all but these: '\0', '<', '&', '\n' */ | /* Match all but these: '\0', '<', '&', '\n' */ |
static unsigned char _match[256] = |
static const unsigned char _ReduceCharDataMatchChars[256] = |
{ | { |
0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | 0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1, | 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1, |
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
}; | }; |
char* p = *pInOut; |
|
char* end = p; |
INLINE int _ReduceCharDataMatch(XML_Char c) |
|
{ |
|
#if defined(CONFIG_ENABLE_WCHAR) |
|
if (c >= 0 && c < 256) |
|
return _ReduceCharDataMatchChars[(unsigned char)c]; |
|
else |
|
return 1; |
|
#else |
|
return _ReduceCharDataMatchChars[(unsigned char)c]; |
|
#endif |
|
} |
|
|
|
/* Reduce character data, advance p, and return pointer to end */ |
|
static XML_Char* _ReduceCharData(_Inout_ XML* self, _Inout_ XMLCharPtr* pInOut) |
|
{ |
|
XML_Char* p = *pInOut; |
|
XML_Char* end; |
size_t n = 0; | size_t n = 0; |
| |
for (;;) |
if (!p) |
|
return NULL; |
|
|
|
while (*p) |
|
{ |
|
if (_ReduceCharDataMatch(*p)) |
{ | { |
while ((_match[(unsigned char)*p])) |
|
p++; | p++; |
|
} |
if (*p != '\n') |
else |
|
{ |
|
if (!*p || *p != '\n') |
break; | break; |
| |
p++; |
|
self->line++; | self->line++; |
|
p++; |
|
} |
} | } |
| |
end = p; | end = p; |
|
|
} | } |
| |
/* Seek next tag start */ | /* Seek next tag start */ |
|
#ifdef _PREFAST_ |
|
#pragma prefast (push) |
|
#pragma prefast (disable: 26018) |
|
/* OACR does not like loops inside loops that modify buffer pointers, the logic however is correct */ |
|
#endif |
while (*p && *p != '<') | while (*p && *p != '<') |
|
#ifdef _PREFAST_ |
|
#pragma prefast (pop) |
|
#endif |
{ | { |
if (*p == '&') | if (*p == '&') |
{ | { |
char c = '\0'; |
XML_Char c = '\0'; |
char* tmp; |
XML_Char* tmp; |
| |
p++; | p++; |
tmp = _ToRef(self, p, &c); | tmp = _ToRef(self, p, &c); |
|
|
{ | { |
for (;;) | for (;;) |
{ | { |
while (_match[(unsigned char)*p]) |
while ((_ReduceCharDataMatch(*p))) |
*end++ = *p++; | *end++ = *p++; |
| |
if (*p != '\n') | if (*p != '\n') |
|
|
return end; | return end; |
} | } |
| |
/* Calculate a fast hash code for a non-zero-length strings */ |
/* Calculate a fast hash code for a strings */ |
INLINE unsigned int _HashCode(const char* s, size_t n) |
INLINE unsigned int _HashCode(_In_reads_z_(n) const XML_Char* s, size_t n) |
{ | { |
/* This hash algorithm excludes the first character since for many strings | /* This hash algorithm excludes the first character since for many strings |
* (e.g., URIs) the first character is not unique. Instead the hash | * (e.g., URIs) the first character is not unique. Instead the hash |
|
|
* (1) The length | * (1) The length |
* (3) The last chacter | * (3) The last chacter |
*/ | */ |
return (int)(n ^ s[n-1]); |
return n ? (int)(n ^ s[n-1]) : 0; |
} | } |
| |
/* Map a URI to a single character namespace identifier */ | /* Map a URI to a single character namespace identifier */ |
static char _FindNamespaceID( |
static XML_Char _FindNamespaceID( |
XML* self, |
_Inout_ XML* self, |
const char* uri, |
_In_reads_z_(uriSize) const XML_Char* uri, |
size_t uriSize) | size_t uriSize) |
{ | { |
size_t i; | size_t i; |
|
|
{ | { |
XML_RegisteredNameSpace* rns = &self->registeredNameSpaces[i]; | XML_RegisteredNameSpace* rns = &self->registeredNameSpaces[i]; |
| |
if (rns->uriCode == code && strcmp(rns->uri, uri) == 0) |
if (rns->uriCode == code && XML_strcmp(rns->uri, uri) == 0) |
return rns->id; | return rns->id; |
} | } |
| |
|
|
return '\0'; | return '\0'; |
} | } |
| |
/* Translate the namespace name used in the document to a single-character |
#define XML_NS "http://www.w3.org/XML/1998/namespace" |
* namespace name specified by the client in the XML_RegisterNameSpace() call. |
#define T_XML_NS T("http://www.w3.org/XML/1998/namespace") |
* For example: "wsman:OptimizeEnumeration" => "w:OptimizeEnumeration". |
#define XML_NS_LEN (sizeof(XML_NS) - 1) |
*/ |
|
static char* _TranslateName(XML* self, char* name, char* colon) |
#define XMLNS_NS "http://www.w3.org/2000/xmlns/" |
{ |
#define T_XMLNS_NS T("http://www.w3.org/2000/xmlns/") |
unsigned int code; |
#define XMLNS_NS_LEN (sizeof(XMLNS_NS) - 1) |
|
|
|
/* Map a prefix to its XML namespace |
|
* A non-empty prefix that is unmapped results in an error */ |
|
static const XML_NameSpace* _FindNamespace(_Inout_ XML* self, _In_z_ const XML_Char* prefix) |
|
{ |
|
static const XML_NameSpace s_empty = {T(""), 0, T(""), 0, '\0', 0}; |
|
static const XML_NameSpace s_xml = {T("xml"), 3 ^ 'l', T_XML_NS, XML_NS_LEN, '\0', 0}; |
|
static const XML_NameSpace s_xmlns = {T("xmlns"), 5 ^ 's', T_XMLNS_NS, XMLNS_NS_LEN, '\0', 0}; |
|
unsigned int code = _HashCode(prefix, XML_strlen(prefix)); |
size_t i; | size_t i; |
| |
/* Temporarily zero-out the ':' character */ |
/* Special case: the 'xml' and 'xmlns' namespaces are fixed. */ |
*colon = '\0'; |
if (prefix[0] == 'x' && |
|
prefix[1] == 'm' && |
/* Calculate hash code */ |
prefix[2] == 'l') |
code = _HashCode(name, colon - name); |
{ |
|
if (prefix[3] == '\0') |
|
return &s_xml; |
|
else if (prefix[3] == 'n' && |
|
prefix[4] == 's' && |
|
prefix[5] == '\0') |
|
return &s_xmlns; |
|
} |
| |
/* First check single entry cache */ | /* First check single entry cache */ |
if (self->nameSpacesCacheIndex != (size_t)-1) | if (self->nameSpacesCacheIndex != (size_t)-1) |
{ | { |
XML_NameSpace* ns = &self->nameSpaces[self->nameSpacesCacheIndex]; |
XML_NameSpace* ns; |
|
_Analysis_assume_(self->nameSpacesCacheIndex < XML_MAX_NAMESPACES); |
|
ns = &self->nameSpaces[self->nameSpacesCacheIndex]; |
| |
if (ns->nameCode == code && strcmp(ns->name, name) == 0) |
if (ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0) |
{ |
|
if (ns->id) |
|
{ |
|
colon[-1] = ns->id; |
|
*colon = ':'; |
|
return colon - 1; |
|
} |
|
else |
|
{ | { |
*colon = ':'; |
return ns; |
return name; |
|
} |
|
} | } |
} | } |
| |
/* Translate name to the one found in the nameSpaces[] array */ | /* Translate name to the one found in the nameSpaces[] array */ |
for (i = self->nameSpacesSize; i--; ) | for (i = self->nameSpacesSize; i--; ) |
{ | { |
XML_NameSpace* ns = &self->nameSpaces[i]; |
const XML_NameSpace* ns = &self->nameSpaces[i]; |
| |
if (ns->nameCode == code && strcmp(ns->name, name) == 0) |
if (ns && ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0) |
{ | { |
/* Cache */ | /* Cache */ |
self->nameSpacesCacheIndex = i; | self->nameSpacesCacheIndex = i; |
| |
if (ns->id) |
return ns; |
{ |
|
colon[-1] = ns->id; |
|
*colon = ':'; |
|
return colon - 1; |
|
} |
|
else |
|
{ |
|
*colon = ':'; |
|
return name; |
|
} | } |
} | } |
|
|
|
/* No match |
|
* For the empty prefix this means there is no namespace |
|
* Otherwise, this is an error */ |
|
if (prefix[0]) |
|
{ |
|
XML_Raise(self, XML_ERROR_UNDEFINED_NAMESPACE_PREFIX, tcs(prefix)); |
|
return NULL; |
} | } |
| |
/* Restore the ':' character */ |
return &s_empty; |
*colon = ':'; |
|
return name; |
|
} | } |
| |
static void _ParseAttr( | static void _ParseAttr( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Inout_ XML_Elem* elem, |
char** pInOut) |
_Inout_ XMLCharPtr* pInOut) |
{ |
{ |
char* p = *pInOut; |
_Null_terminated_ XML_Char* p = *pInOut; |
char* name; |
XML_Char* name; |
char* nameEnd; |
XML_Char* nameEnd; |
char* value; |
XML_Char* value; |
char* valueEnd; |
XML_Char* valueEnd; |
char* colon = NULL; |
XML_Char* colon = NULL; |
|
XML_Char* tag = p; |
|
XML_Char* prefix = T(""); |
|
|
|
if (!p) |
|
{ |
|
XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_NAME); |
|
return; |
|
} |
| |
/* Parse the attribute name */ | /* Parse the attribute name */ |
{ | { |
name = p; | name = p; |
| |
if (!_IsFirst(*p)) |
if (!*p || !_IsFirst(*p)) |
{ | { |
XML_Raise(self, "expected attribute name"); |
XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_NAME); |
return; | return; |
} | } |
| |
p++; | p++; |
| |
|
if (*p) |
p = _SkipInner(p); | p = _SkipInner(p); |
| |
if (*p == ':') | if (*p == ':') |
|
|
/* Expect a '=' character */ | /* Expect a '=' character */ |
if (*p++ != '=') | if (*p++ != '=') |
{ | { |
XML_Raise(self, "expected '=' character"); |
*nameEnd = '\0'; |
|
XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_EQUALS, tcs(name)); |
return; | return; |
} | } |
} | } |
|
|
| |
/* Parse the value */ | /* Parse the value */ |
{ | { |
char quote; |
XML_Char quote; |
| |
/* Expect opening quote */ | /* Expect opening quote */ |
if (*p != '"' && *p != '\'') | if (*p != '"' && *p != '\'') |
{ | { |
XML_Raise(self, "expected opening quote"); |
XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_OPENING_QUOTES, tcs(name)); |
return; | return; |
} | } |
| |
|
|
/* Expect closing quote */ | /* Expect closing quote */ |
if (*p++ != quote) | if (*p++ != quote) |
{ | { |
XML_Raise(self, "expected closing quote"); |
XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_CLOSING_QUOTES, tcs(name)); |
return; | return; |
} | } |
| |
|
|
/* Skip spaces */ | /* Skip spaces */ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
| |
/* If attribute name is "xmlns", extract namespace */ |
/* Separate the prefix and tag */ |
|
if (colon) |
|
{ |
|
*colon = '\0'; |
|
tag = colon + 1; |
|
prefix = name; |
|
} |
|
/* else prefix is "" and name is the tag */ |
|
|
|
/* If the first/only token is "xmlns", extract namespace */ |
if (name[0] == 'x' && | if (name[0] == 'x' && |
name[1] == 'm' && | name[1] == 'm' && |
name[2] == 'l' && | name[2] == 'l' && |
name[3] == 'n' && | name[3] == 'n' && |
name[4] == 's') |
name[4] == 's' && |
|
name[5] == '\0') |
{ | { |
/* ATTN: implement default namespaces */ |
unsigned int tagCode = 0; |
if (name[5] != ':') |
|
|
/* The namespace of the xmlns:x attribute is fixed */ |
|
prefix = T("xmlns"); |
|
|
|
if (colon) |
{ | { |
XML_Raise(self, "default namespaces not supported: xmlns"); |
/* For non-default namespaces */ |
return; |
tagCode = _HashCode(tag, nameEnd - tag); |
} | } |
| |
/* Add new namespace entry */ | /* Add new namespace entry */ |
|
|
/* Check for stack overflow */ | /* Check for stack overflow */ |
if (self->nameSpacesSize == XML_MAX_NAMESPACES) | if (self->nameSpacesSize == XML_MAX_NAMESPACES) |
{ | { |
XML_Raise(self, "too many namespaces (>%u)", |
XML_Raise(self, XML_ERROR_TOO_MANY_NAMESPACES, |
(int)XML_MAX_NAMESPACES); | (int)XML_MAX_NAMESPACES); |
return; | return; |
} | } |
{ | { |
XML_NameSpace* ns = &self->nameSpaces[self->nameSpacesSize++]; |
XML_NameSpace* newNs = &self->nameSpaces[self->nameSpacesSize++]; |
ns->name = &name[6]; |
newNs->name = colon ? tag : T(""); |
ns->nameCode = _HashCode(ns->name, nameEnd - ns->name); |
newNs->nameCode = tagCode; |
ns->id = _FindNamespaceID(self, value, valueEnd - value); |
newNs->id = _FindNamespaceID(self, value, valueEnd - value); |
ns->uri = value; |
newNs->uri = value; |
ns->depth = self->stackSize; |
newNs->uriSize = valueEnd - value; |
} |
newNs->depth = self->stackSize; |
} | } |
} | } |
else |
|
{ |
|
/* Translate the name (possibly replacing namespace with single char) */ |
|
if (colon) |
|
name = _TranslateName(self, name, colon); |
|
} | } |
| |
/* Append attribute to element */ | /* Append attribute to element */ |
|
|
/* Check for attribute array overflow */ | /* Check for attribute array overflow */ |
if (elem->attrsSize == XML_MAX_ATTRIBUTES) | if (elem->attrsSize == XML_MAX_ATTRIBUTES) |
{ | { |
XML_Raise(self, "too many attributes (>%u)", XML_MAX_ATTRIBUTES); |
elem->data.data[elem->data.size] = 0; //May not have been null termated yet |
|
XML_Raise(self, XML_ERROR_TOO_MANY_ATTRIBUTES, tcs(elem->data.data), (int)XML_MAX_ATTRIBUTES); |
return; | return; |
} | } |
| |
attr = &elem->attrs[elem->attrsSize++]; | attr = &elem->attrs[elem->attrsSize++]; |
attr->name = name; |
attr->name.data = tag; |
|
attr->name.size = nameEnd - tag; |
attr->value = value; | attr->value = value; |
|
attr->valueSize = valueEnd - value; |
|
|
|
/* Save the namespace prefix, which will be translated by the caller */ |
|
attr->name.namespaceUri = prefix; |
} | } |
| |
*pInOut = p; | *pInOut = p; |
} | } |
| |
static void _ParseProcessingInstruction( | static void _ParseProcessingInstruction( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Inout_ XML_Elem* elem, |
char* p) |
_In_z_ XML_Char* p) |
{ | { |
/* <?xml version="1.0" encoding="UTF-8" standalone="yes"?> */ | /* <?xml version="1.0" encoding="UTF-8" standalone="yes"?> */ |
char* start; |
XML_Char* start; |
char* end; |
XML_Char* end; |
| |
/* Advance past '?' character */ | /* Advance past '?' character */ |
|
if (*p) |
|
{ |
p++; | p++; |
|
} |
| |
/* Get tag identifier */ | /* Get tag identifier */ |
{ | { |
start = p; | start = p; |
| |
|
if (*p) |
|
{ |
p = _SkipInner(p); | p = _SkipInner(p); |
|
} |
| |
if (*p == ':') | if (*p == ':') |
{ | { |
p++; | p++; |
|
if (*p) |
|
{ |
p = _SkipInner(p); | p = _SkipInner(p); |
} | } |
|
} |
| |
/* If input exhuasted */ |
/* If input exhausted */ |
if (*p == '\0') | if (*p == '\0') |
{ | { |
XML_Raise(self, "premature end of input"); |
XML_Raise(self, XML_ERROR_END_OF_XML_INSTRUCTION); |
return; | return; |
} | } |
| |
|
|
} | } |
| |
/* Skip spaces */ | /* Skip spaces */ |
|
if (*p) |
|
{ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
|
} |
|
|
|
elem->type = XML_INSTRUCTION; |
|
elem->data.data = start; |
|
elem->data.size = end - start; |
| |
/* Process attributes */ | /* Process attributes */ |
| |
|
|
} | } |
} | } |
| |
|
if (*p) |
|
{ |
p++; | p++; |
|
} |
| |
/* Skip spaces */ | /* Skip spaces */ |
|
if (*p) |
|
{ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
|
} |
| |
/* Expect '>' */ | /* Expect '>' */ |
if (*p++ != '>') | if (*p++ != '>') |
{ | { |
XML_Raise(self, "expected closing angle bracket"); |
XML_Raise(self, XML_ERROR_END_OF_INSTRUCTION_MISSING); |
return; | return; |
} | } |
| |
/* Return element object */ | /* Return element object */ |
elem->type = XML_INSTRUCTION; |
elem->data.namespaceUri = T(""); |
elem->data = start; |
elem->data.namespaceUriSize = 0; |
elem->size = end - start; |
elem->data.namespaceId = '\0'; |
*end = '\0'; | *end = '\0'; |
| |
self->ptr = p; | self->ptr = p; |
|
|
} | } |
| |
static void _ParseStartTag( | static void _ParseStartTag( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Inout_ XML_Elem* elem, |
char* p) |
_In_z_ XML_Char* p) |
{ |
{ |
char* name; |
XML_Char* name; |
char* nameEnd; |
XML_Char* nameEnd; |
char* colon = NULL; |
XML_Char* colon = NULL; |
|
XML_Char* prefix = T(""); |
|
const XML_NameSpace* ns; |
|
size_t attr; |
| |
/* Found the root */ | /* Found the root */ |
self->foundRoot = 1; | self->foundRoot = 1; |
|
|
{ | { |
name = p; | name = p; |
| |
if (!_IsFirst(*p++)) |
if (!*p || !_IsFirst(*p++)) |
{ | { |
XML_Raise(self, "expected element name"); |
XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED); |
return; | return; |
} | } |
| |
|
if (*p) |
|
{ |
p = _SkipInner(p); | p = _SkipInner(p); |
|
} |
| |
if (*p == ':') | if (*p == ':') |
{ | { |
colon = p++; | colon = p++; |
|
if (*p) |
|
{ |
p = _SkipInner(p); | p = _SkipInner(p); |
} | } |
|
} |
| |
/* If input exhuasted */ | /* If input exhuasted */ |
if (*p == '\0') | if (*p == '\0') |
{ | { |
XML_Raise(self, "premature end of input"); |
XML_Raise(self, XML_ERROR_ELEMENT_NAME_PREMATURE_END); |
return; | return; |
} | } |
| |
nameEnd = p; | nameEnd = p; |
} | } |
| |
|
if (colon) |
|
{ |
|
*colon = '\0'; |
|
prefix = name; |
|
name = colon + 1; |
|
} |
|
|
/* Skip spaces */ | /* Skip spaces */ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
| |
|
elem->type = XML_START; |
|
elem->data.data = name; |
|
elem->data.size = nameEnd - name; |
|
|
/* Process attributes */ | /* Process attributes */ |
while (*p && *p != '/' && *p != '>') | while (*p && *p != '/' && *p != '>') |
{ | { |
|
|
return; | return; |
} | } |
| |
|
/* Translate the namespace after parsing xmlns attributes */ |
|
ns = _FindNamespace(self, prefix); |
|
|
|
if (self->status) |
|
return; |
|
|
|
/* Now translate the attribute namespaces */ |
|
/* Unprefixed attributes get a empty namespace */ |
|
for (attr = 0; attr < elem->attrsSize; attr++) |
|
{ |
|
static const XML_NameSpace s_empty = {T(""), 0, T(""), 0, '\0', 0}; |
|
XML_Attr* item = elem->attrs + attr; |
|
const XML_NameSpace* itemNS = &s_empty; |
|
|
|
if (item->name.namespaceUri[0] != '\0') |
|
{ |
|
/* The namespaceUri field contains the prefix */ |
|
itemNS = _FindNamespace(self, item->name.namespaceUri); |
|
|
|
if (self->status) |
|
return; |
|
} |
|
|
|
item->name.namespaceUri = itemNS->uri; |
|
item->name.namespaceUriSize = itemNS->uriSize; |
|
item->name.namespaceId = itemNS->id; |
|
} |
|
|
|
/* Create the element */ |
|
elem->type = XML_START; |
|
elem->data.data = name; |
|
elem->data.size = nameEnd - name; |
|
elem->data.namespaceUri = ns->uri; |
|
elem->data.namespaceUriSize = ns->uriSize; |
|
elem->data.namespaceId = ns->id; |
|
|
/* Check for empty tag */ | /* Check for empty tag */ |
if (*p == '/') | if (*p == '/') |
{ | { |
|
|
/* Null-terminate the tag */ | /* Null-terminate the tag */ |
*nameEnd = '\0'; | *nameEnd = '\0'; |
| |
/* Translate tag name (possibly replacing namespace with single char */ |
|
if (colon) |
|
name = _TranslateName(self, name, colon); |
|
|
|
/* Create the element */ |
|
elem->type = XML_START; |
|
elem->data = name; |
|
elem->size = nameEnd - name; |
|
|
|
/* Inject an empty tag onto element stack */ | /* Inject an empty tag onto element stack */ |
{ | { |
/* Check for stack overflow */ | /* Check for stack overflow */ |
if (self->elemStackSize == XML_MAX_NESTED) | if (self->elemStackSize == XML_MAX_NESTED) |
{ | { |
XML_Raise(self, "element stack overflow (>%u)", XML_MAX_NESTED); |
XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW, |
|
(int)XML_MAX_NESTED); |
return; | return; |
} | } |
| |
|
|
} | } |
| |
/* Skip space */ | /* Skip space */ |
|
if (*p) |
|
{ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
|
} |
| |
/* Expect '>' */ | /* Expect '>' */ |
if (*p++ != '>') | if (*p++ != '>') |
{ | { |
XML_Raise(self, "expected closing angle bracket"); |
*nameEnd = '\0'; |
|
XML_Raise(self, XML_ERROR_ELEMENT_NAME_NOT_CLOSED, tcs(elem->data.data)); |
return; | return; |
} | } |
| |
|
|
/* Expect '>' */ | /* Expect '>' */ |
if (*p++ != '>') | if (*p++ != '>') |
{ | { |
XML_Raise(self, "expected closing angle bracket"); |
*nameEnd = '\0'; |
|
XML_Raise(self, XML_ERROR_ELEMENT_NAME_NOT_CLOSED, tcs(elem->data.data)); |
return; | return; |
} | } |
| |
/* Zero-terminate the name tag */ | /* Zero-terminate the name tag */ |
*nameEnd = '\0'; | *nameEnd = '\0'; |
| |
/* Translate the namespace prefix */ |
|
if (colon) |
|
name = _TranslateName(self, name, colon); |
|
|
|
/* Push opening tag */ | /* Push opening tag */ |
{ | { |
if (self->stackSize == XML_MAX_NESTED) | if (self->stackSize == XML_MAX_NESTED) |
{ | { |
XML_Raise(self, "element stack overflow (>%u)", XML_MAX_NESTED); |
XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW, |
|
(int)XML_MAX_NESTED); |
return; | return; |
} | } |
| |
self->stack[self->stackSize].data = name; |
self->stack[self->stackSize] = elem->data; |
self->stack[self->stackSize].size = nameEnd - name; |
|
self->stackSize++; | self->stackSize++; |
self->nesting++; | self->nesting++; |
} | } |
| |
/* Return element object */ |
|
elem->type = XML_START; |
|
elem->data = name; |
|
elem->size = nameEnd - name; |
|
|
|
self->ptr = p; | self->ptr = p; |
| |
if (self->foundRoot) | if (self->foundRoot) |
|
|
} | } |
| |
static void _ParseEndTag( | static void _ParseEndTag( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Inout_ XML_Elem* elem, |
char* p) |
_In_z_ XML_Char* p) |
{ | { |
/* Closing element: </name> */ | /* Closing element: </name> */ |
char* name; |
XML_Char* name; |
char* nameEnd; |
XML_Char* nameEnd; |
char* colon = NULL; |
XML_Char* colon = NULL; |
|
XML_Char* prefix = T(""); |
|
const XML_NameSpace *ns; |
| |
|
if (*p) |
|
{ |
p++; | p++; |
|
} |
| |
/* Skip space */ | /* Skip space */ |
|
if (*p) |
|
{ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
|
} |
| |
name = p; | name = p; |
| |
/* Skip name */ | /* Skip name */ |
{ | { |
if (!_IsFirst(*p++)) |
if (!*p || !_IsFirst(*p++)) |
{ | { |
XML_Raise(self, "expected element name"); |
XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED_ELEM_END); |
return; | return; |
} | } |
| |
|
if (*p) |
|
{ |
p = _SkipInner(p); | p = _SkipInner(p); |
|
} |
| |
if (*p == ':') | if (*p == ':') |
{ | { |
colon = p++; | colon = p++; |
|
if (*p) |
|
{ |
p = _SkipInner(p); | p = _SkipInner(p); |
} | } |
} | } |
|
} |
| |
/* If input exhuasted */ | /* If input exhuasted */ |
if (*p == '\0') | if (*p == '\0') |
{ | { |
XML_Raise(self, "premature end of input"); |
XML_Raise(self, XML_ERROR_ELEMENT_NAME_PREMATURE_END_ELEM_END); |
return; | return; |
} | } |
| |
nameEnd = p; | nameEnd = p; |
| |
|
if (colon) |
|
{ |
|
*colon = '\0'; |
|
prefix = name; |
|
name = colon + 1; |
|
} |
|
|
/* Skip spaces */ | /* Skip spaces */ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
| |
/* Expect '>' */ | /* Expect '>' */ |
if (*p++ != '>') | if (*p++ != '>') |
{ | { |
XML_Raise(self, "expected closing angle bracket"); |
XML_Raise(self,XML_ERROR_ELEMENT_NAME_NOT_CLOSED_ELEM_END, tcs(name)); |
return; | return; |
} | } |
| |
/* Null terminate name */ | /* Null terminate name */ |
*nameEnd = '\0'; | *nameEnd = '\0'; |
| |
/* Tranlate the namespace part of the name */ |
ns = _FindNamespace(self, prefix); |
if (colon) |
|
name = _TranslateName(self, name, colon); |
if (self->status) |
|
return; |
| |
/* Return element object */ | /* Return element object */ |
elem->type = XML_END; | elem->type = XML_END; |
elem->data = name; |
elem->data.data = name; |
elem->size = nameEnd - name; |
elem->data.size = nameEnd - name; |
|
elem->data.namespaceUri = ns->uri; |
|
elem->data.namespaceUriSize = ns->uriSize; |
|
elem->data.namespaceId = ns->id; |
| |
/* Match opening name */ | /* Match opening name */ |
{ | { |
/* Check for stack underflow */ | /* Check for stack underflow */ |
if (self->stackSize-- == 0) | if (self->stackSize-- == 0) |
{ | { |
XML_Raise(self, "too many closing tags: %s", name); |
XML_Raise(self, XML_ERROR_ELEMENT_TOO_MANY_ENDS, tcs(name)); |
return; | return; |
} | } |
| |
|
|
{ | { |
XML_Name* xn = &self->stack[self->stackSize]; | XML_Name* xn = &self->stack[self->stackSize]; |
| |
if (memcmp(xn->data, name, xn->size) != 0) |
if (XML_strcmp(xn->data, name) != 0 || |
|
xn->namespaceId != ns->id || |
|
(ns->id == 0 && XML_strcmp(xn->namespaceUri, ns->uri) != 0)) |
{ | { |
XML_Raise(self, "open/close tag mismatch: %s/%s", |
XML_Raise(self, XML_ERROR_ELEMENT_END_ELEMENT_TAG_NOT_MATCH_START_TAG, |
self->stack[self->stackSize].data, name); |
tcs(self->stack[self->stackSize].data), tcs(name)); |
return; | return; |
} | } |
} | } |
|
|
} | } |
| |
static void _ParseComment( | static void _ParseComment( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Inout_ XML_Elem* elem, |
char* p) |
_In_z_ XML_Char* p) |
{ | { |
/* Comment: <!-- blah blah blah --> */ | /* Comment: <!-- blah blah blah --> */ |
char* start; |
XML_Char* start; |
char* end; |
XML_Char* end; |
| |
p += 2; |
p = _SkipChars(p, 2); |
|
if (!*p) |
|
{ |
|
XML_Raise(self, XML_ERROR_COMMENT_PREMATURE_END); |
|
return; |
|
} |
start = p; | start = p; |
| |
while (*p) | while (*p) |
|
|
{ | { |
if (p[2] != '>') | if (p[2] != '>') |
{ | { |
XML_Raise(self, "expected closing comment"); |
XML_Raise(self, XML_ERROR_COMMENT_END_EXPECTED); |
return; | return; |
} | } |
| |
|
|
| |
/* Prepare element */ | /* Prepare element */ |
elem->type = XML_COMMENT; | elem->type = XML_COMMENT; |
elem->data = start; |
elem->data.data = start; |
elem->size = end - start; |
elem->data.size = end - start; |
|
elem->data.namespaceUri = T(""); |
|
elem->data.namespaceUriSize = 0; |
|
elem->data.namespaceId = '\0'; |
*end = '\0'; | *end = '\0'; |
| |
/* Set next state */ | /* Set next state */ |
|
|
p++; | p++; |
} | } |
| |
XML_Raise(self, "malformed comment"); |
XML_Raise(self, XML_ERROR_COMMENT_PREMATURE_END); |
} | } |
| |
static void _ParseCDATA( | static void _ParseCDATA( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Inout_ XML_Elem* elem, |
char* p) |
_In_z_ XML_Char* p) |
{ | { |
/* <![CDATA[...]]> */ | /* <![CDATA[...]]> */ |
char* start; |
XML_Char* start; |
char* end; |
XML_Char* end; |
| |
p += 7; |
p = _SkipChars(p, 7); |
|
if (!*p) |
|
{ |
|
XML_Raise(self, XML_ERROR_CDATA_PREMATURE_END); |
|
return; |
|
} |
start = p; | start = p; |
| |
while (*p) | while (*p) |
|
|
| |
/* Prepare element */ | /* Prepare element */ |
elem->type = XML_CHARS; | elem->type = XML_CHARS; |
elem->data = start; |
elem->data.data = start; |
elem->size = end - start; |
elem->data.size = end - start; |
|
elem->data.namespaceUri = T(""); |
|
elem->data.namespaceUriSize = 0; |
|
elem->data.namespaceId = '\0'; |
*end = '\0'; | *end = '\0'; |
| |
/* Set next state */ | /* Set next state */ |
|
|
p++; | p++; |
} | } |
| |
XML_Raise(self, "unterminated CDATA section"); |
XML_Raise(self, XML_ERROR_CDATA_PREMATURE_END); |
return; | return; |
} | } |
| |
static void _ParseDOCTYPE( | static void _ParseDOCTYPE( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Inout_ XML_Elem* elem, |
char* p) |
_In_z_ XML_Char* p) |
{ | { |
MI_UNUSED(elem); | MI_UNUSED(elem); |
| |
/* Recognize <!DOCTYPE ...> */ | /* Recognize <!DOCTYPE ...> */ |
p += 7; |
p = _SkipChars(p, 7); |
|
if (!*p) |
|
{ |
|
XML_Raise(self, XML_ERROR_DOCTYPE_PREMATURE_END); |
|
return; |
|
} |
| |
while (*p && *p != '>') | while (*p && *p != '>') |
{ | { |
|
|
| |
if (*p++ != '>') | if (*p++ != '>') |
{ | { |
XML_Raise(self, "unterminated DOCTYPE element"); |
XML_Raise(self, XML_ERROR_DOCTYPE_PREMATURE_END); |
return; | return; |
} | } |
| |
|
|
} | } |
| |
static int _ParseCharData( | static int _ParseCharData( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Inout_ XML_Elem* elem, |
char* p) |
_In_z_ XML_Char* p) |
{ | { |
char* start; |
XML_Char* start; |
char* end; |
XML_Char* end; |
|
|
/* Skip leading spaces */ |
|
p = _SkipSpaces(self, p); |
|
| |
/* Reject input if it does appear inside tags */ | /* Reject input if it does appear inside tags */ |
if (self->stackSize == 0) | if (self->stackSize == 0) |
{ | { |
if (*p == '\0') |
/* Finished parsing document */ |
{ |
|
/* Proper end of input so set status to zero */ |
|
self->status = 1; | self->status = 1; |
|
self->ptr = p; |
return 0; | return 0; |
} | } |
| |
XML_Raise(self, "markup outside root element"); |
|
return 0; |
|
} |
|
|
|
/* Remove leading spaces */ |
|
p = _SkipSpaces(self, p); |
|
|
|
if (*p == '<') | if (*p == '<') |
{ | { |
self->ptr = p + 1; | self->ptr = p + 1; |
|
|
/* Process character data */ | /* Process character data */ |
if (*p != '<') | if (*p != '<') |
{ | { |
XML_Raise(self, "expected opening angle bracket"); |
XML_Raise(self, XML_ERROR_CHARDATA_EXPECTED_ELEMENT_END_TAG); |
return 0; | return 0; |
} | } |
| |
/* Remove trailing spaces (the newlines have already been counted) */ |
|
{ |
|
/* Remove trainling spaces from the character data */ |
|
start[-1] = '\0'; |
|
|
|
while (_IsSpace(end[-1])) |
|
end--; |
|
} |
|
|
|
/* Set next state */ | /* Set next state */ |
self->ptr = p + 1; | self->ptr = p + 1; |
self->state = STATE_TAG; | self->state = STATE_TAG; |
|
|
/* Prepare element */ | /* Prepare element */ |
*end = '\0'; | *end = '\0'; |
elem->type = XML_CHARS; | elem->type = XML_CHARS; |
elem->data = start; |
elem->data.data = start; |
elem->size = end - start; |
elem->data.size = end - start; |
|
elem->data.namespaceUri = T(""); |
|
elem->data.namespaceUriSize = 0; |
|
elem->data.namespaceId = '\0'; |
| |
/* Return 1 to indicate non-empty element */ | /* Return 1 to indicate non-empty element */ |
return 1; | return 1; |
|
|
**============================================================================== | **============================================================================== |
*/ | */ |
| |
const char* XML_Elem_GetAttr( |
const XML_Char* XML_Elem_GetAttr( |
XML_Elem* self, |
_Inout_ XML_Elem* self, |
const char* name) |
XML_Char nsId, |
|
_In_z_ const XML_Char* name) |
{ | { |
size_t i; | size_t i; |
| |
for (i = 0; i < self->attrsSize; i++) | for (i = 0; i < self->attrsSize; i++) |
{ | { |
if (strcmp(name, self->attrs[i].name) == 0) |
if (nsId == self->attrs[i].name.namespaceId && |
|
XML_strcmp(name, self->attrs[i].name.data) == 0) |
return self->attrs[i].value; | return self->attrs[i].value; |
} | } |
| |
|
|
} | } |
| |
void XML_Init( | void XML_Init( |
XML* self) |
_Out_ XML* self) |
{ | { |
memset(self, 0, sizeof(XML)); | memset(self, 0, sizeof(XML)); |
| |
|
|
} | } |
| |
void XML_SetText( | void XML_SetText( |
XML* self, |
_Inout_ XML* self, |
char* text) |
_In_z_ XML_Char* text) |
{ | { |
self->text = text; | self->text = text; |
self->ptr = text; | self->ptr = text; |
|
|
} | } |
| |
int XML_Next( | int XML_Next( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem) |
_Out_ XML_Elem* elem) |
{ | { |
if (self->elemStackSize) | if (self->elemStackSize) |
{ | { |
|
|
{ | { |
case STATE_START: | case STATE_START: |
{ | { |
char* p = self->ptr; |
XML_Char* p = self->ptr; |
| |
/* Skip spaces */ | /* Skip spaces */ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
|
|
/* Expect '<' */ | /* Expect '<' */ |
if (*p != '<') | if (*p != '<') |
{ | { |
XML_Raise(self, "expected opening angle bracket"); |
XML_Raise(self, XML_ERROR_OPEN_ANGLE_BRACKET_EXPECTED); |
return -1; | return -1; |
} | } |
| |
|
|
} | } |
case STATE_TAG: | case STATE_TAG: |
{ | { |
char* p = self->ptr; |
XML_Char* p = self->ptr; |
| |
/* Skip spaces */ | /* Skip spaces */ |
p = _SkipSpaces(self, p); | p = _SkipSpaces(self, p); |
|
|
_ParseComment(self, elem, p); | _ParseComment(self, elem, p); |
return self->status; | return self->status; |
} | } |
else if (memcmp(p, "[CDATA[", 7) == 0) |
else if ( |
|
memcmp(p, T("[CDATA["), 7 * sizeof(XML_Char)) == 0) |
{ | { |
_ParseCDATA(self, elem, p); | _ParseCDATA(self, elem, p); |
return self->status; | return self->status; |
} | } |
else if (memcmp(p, "DOCTYPE", 7) == 0) |
else if ( |
|
memcmp(p, T("DOCTYPE"), 7 * sizeof(XML_Char)) == 0) |
{ | { |
_ParseDOCTYPE(self, elem, p); | _ParseDOCTYPE(self, elem, p); |
| |
|
|
} | } |
else | else |
{ | { |
XML_Raise(self, "expected comment, CDATA, or DOCTYPE"); |
XML_Raise(self, XML_ERROR_COMMENT_CDATA_DOCTYPE_EXPECTED); |
return -1; | return -1; |
} | } |
} | } |
else | else |
{ | { |
XML_Raise(self, "expected element"); |
XML_Raise(self, XML_ERROR_ELEMENT_EXPECTED); |
return-1; | return-1; |
} | } |
break; | break; |
} | } |
case STATE_CHARS: | case STATE_CHARS: |
{ | { |
char* p = self->ptr; |
XML_Char* p = self->ptr; |
| |
if (_ParseCharData(self, elem, p) == 1) | if (_ParseCharData(self, elem, p) == 1) |
{ | { |
|
|
} | } |
default: | default: |
{ | { |
XML_Raise(self, "unexpected state"); |
XML_Raise(self, XML_ERROR_UNEXPECTED_STATE); |
return -1; | return -1; |
} | } |
} | } |
|
|
} | } |
| |
int XML_Expect( | int XML_Expect( |
XML* self, |
_Inout_ XML* self, |
XML_Elem* elem, |
_Out_ XML_Elem* elem, |
XML_Type type, | XML_Type type, |
const char* name) |
XML_Char nsId, |
|
_In_z_ const XML_Char* name) |
{ | { |
if (XML_Next(self, elem) == 0 && | if (XML_Next(self, elem) == 0 && |
elem->type == type && | elem->type == type && |
(!name || strcmp(elem->data, name) == 0)) |
nsId == elem->data.namespaceId && |
|
(!name || XML_strcmp(elem->data.data, name) == 0)) |
{ | { |
return 0; | return 0; |
} | } |
| |
#if 0 |
|
XML_Elem_Dump(elem); |
|
#endif |
|
|
|
if (type == XML_START) | if (type == XML_START) |
XML_Raise(self, "expected element: <%s>: %s", name, elem->data); |
XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED, |
|
tcs(name), tcs(elem->data.data)); |
else if (type == XML_END) | else if (type == XML_END) |
XML_Raise(self, "expected element: </%s>: %s", name, elem->data); |
XML_Raise(self, XML_ERROR_SPECIFIC_END_ELEMENT_EXPECTED, |
|
tcs(name), tcs(elem->data.data)); |
else if (type == XML_CHARS) | else if (type == XML_CHARS) |
XML_Raise(self, "expected character data"); |
XML_Raise(self, XML_ERROR_CHARACTER_DATA_EXPECTED); |
| |
return -1; | return -1; |
} | } |
| |
int XML_Skip( | int XML_Skip( |
XML* self) |
_Inout_ XML* self) |
{ | { |
XML_Elem tmp; | XML_Elem tmp; |
size_t nesting = self->nesting; | size_t nesting = self->nesting; |
|
|
} | } |
| |
int XML_RegisterNameSpace( | int XML_RegisterNameSpace( |
XML* self, |
_Inout_ XML* self, |
char id, |
XML_Char id, |
const char* uri) |
_In_z_ const XML_Char* uri) |
{ | { |
XML_RegisteredNameSpace rns; | XML_RegisteredNameSpace rns; |
/* ATTN: we do not check for duplicates */ | /* ATTN: we do not check for duplicates */ |
|
|
if (self->registeredNameSpacesSize == XML_MAX_REGISTERED_NAMESPACES) | if (self->registeredNameSpacesSize == XML_MAX_REGISTERED_NAMESPACES) |
return -1; | return -1; |
| |
/* Reject zero-length URI's */ |
|
if (uri[0] == '\0') |
|
return -1; |
|
|
|
rns.id = id; | rns.id = id; |
rns.uri = uri; | rns.uri = uri; |
rns.uriCode = _HashCode(uri, strlen(uri)); |
rns.uriCode = _HashCode(uri, XML_strlen(uri)); |
| |
self->registeredNameSpaces[self->registeredNameSpacesSize++] = rns; | self->registeredNameSpaces[self->registeredNameSpacesSize++] = rns; |
| |
|
|
} | } |
| |
int XML_PutBack( | int XML_PutBack( |
XML* self, |
_Inout_ XML* self, |
const XML_Elem* elem) |
_In_ const XML_Elem* elem) |
{ | { |
/* Check for stack overflow */ | /* Check for stack overflow */ |
if (self->elemStackSize == XML_MAX_NESTED) | if (self->elemStackSize == XML_MAX_NESTED) |
{ | { |
XML_Raise(self, "element stack overflow (>%u)", XML_MAX_NESTED); |
XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW, |
|
(int)XML_MAX_NESTED); |
return -1; | return -1; |
} | } |
| |
|
|
return 0; | return 0; |
} | } |
| |
void XML_PutError(XML* self) |
#if defined(_MSC_VER) |
|
void XML_Raise(_Inout_ XML* self, unsigned formatStringId, ...) |
{ | { |
if (self->status == -1) |
HMODULE hModule; |
fprintf(stderr, "line %u: %s\n", (int)self->line, self->message); |
XML_Char formatMsg[MAX_PATH]; |
|
va_list ap; |
|
memset(&ap, 0, sizeof(ap)); |
|
|
|
self->status = -1; |
|
self->message[0] = '\0'; |
|
|
|
if (GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (LPCTSTR ) XML_Raise, &hModule) == 0) |
|
{ |
|
memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML."))); |
|
} |
|
else |
|
{ |
|
if(LoadStringW(hModule, formatStringId, formatMsg, MAX_PATH)) |
|
{ |
|
va_start(ap, formatStringId); |
|
if (FormatMessageW(FORMAT_MESSAGE_FROM_STRING, formatMsg, 0, 0, self->message, sizeof(self->message)/sizeof(self->message[0]), &ap) == 0) |
|
{ |
|
memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML."))); |
|
} |
|
va_end(ap); |
} | } |
|
else |
|
{ |
|
memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML."))); |
|
} |
|
} |
|
} |
|
#else |
| |
void XML_Raise(XML* self, const char* format, ...) |
void XML_Raise(XML* self, _In_z_ const XML_Char* format, ...) |
{ | { |
int n; |
|
va_list ap; | va_list ap; |
memset(&ap, 0, sizeof(ap)); |
|
| |
self->status = -1; | self->status = -1; |
self->message[0] = '\0'; | self->message[0] = '\0'; |
| |
|
memset(&ap, 0, sizeof(ap)); |
va_start(ap, format); | va_start(ap, format); |
n = Vsnprintf(self->message, sizeof(self->message), format, ap); |
Vstprintf(self->message, MI_COUNT(self->message), format, ap); |
va_end(ap); | va_end(ap); |
} | } |
|
#endif |
| |
void XML_FormatError(XML* self, char* format, size_t size) |
void XML_FormatError(_Inout_ XML* self, _Out_writes_z_(size) XML_Char* buffer, size_t size) |
{ | { |
*format = '\0'; |
*buffer = '\0'; |
| |
if (self->status == -1) | if (self->status == -1) |
Snprintf(format, size, "%u: error: %s", (unsigned int)self->line, self->message); |
{ |
|
Stprintf( |
|
buffer, |
|
size, |
|
PAL_T("%u: error: %T"), |
|
(unsigned int)self->line, |
|
tcs(self->message)); |
|
} |
|
} |
|
|
|
|
|
int XML_StripWhitespace( |
|
_Inout_ XML_Elem* elem) |
|
{ |
|
if (elem->type != XML_CHARS) |
|
{ |
|
return -1; |
|
} |
|
|
|
//Strip leading white space |
|
while (elem->data.size && _IsSpace(*elem->data.data)) |
|
{ |
|
elem->data.data++; |
|
elem->data.size--; |
|
} |
|
//Strip trailing white space |
|
while(elem->data.size && _IsSpace(elem->data.data[elem->data.size-1])) |
|
{ |
|
elem->data.data[elem->data.size-1] = T('\0'); |
|
elem->data.size--; |
|
} |
|
return 0; |
} | } |