version 1.88.6.2, 2003/08/13 19:39:51
|
version 1.91, 2003/08/06 14:53:21
|
|
|
#include "XmlParser.h" | #include "XmlParser.h" |
#include "Tracer.h" | #include "Tracer.h" |
#include <Pegasus/Common/StatisticalData.h> | #include <Pegasus/Common/StatisticalData.h> |
#include "CommonUTF.h" |
|
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
|
|
| |
inline void _appendChar(Array<Sint8>& out, const Char16& c) | inline void _appendChar(Array<Sint8>& out, const Char16& c) |
{ | { |
// We need to convert the Char16 to UTF8 then append the UTF8 |
out.append(Sint8(c)); |
// character into the array. |
|
// NOTE: The UTF8 character could be several bytes long. |
|
// WARNING: This function will put in replacement character for |
|
// all characters that have surogate pairs. |
|
|
|
char str[6]; |
|
memset(str,0x00,sizeof(str)); |
|
char* charIN = (char *)&c; |
|
|
|
const Uint16 *strsrc = (Uint16 *)charIN; |
|
Uint16 *endsrc = (Uint16 *)&charIN[1]; |
|
|
|
Uint8 *strtgt = (Uint8 *)str; |
|
Uint8 *endtgt = (Uint8 *)&str[5]; |
|
|
|
UTF16toUTF8(&strsrc, |
|
endsrc, |
|
&strtgt, |
|
endtgt); |
|
|
|
out.append((Sint8 *)str,trailingBytesForUTF8[Uint32(str[0])]+1); |
|
} | } |
| |
inline void _appendSpecialChar(Array<Sint8>& out, const Char16& c) | inline void _appendSpecialChar(Array<Sint8>& out, const Char16& c) |
{ | { |
if ( ((c < Char16(0x20)) && (c >= Char16(0x00))) || (c == Char16(0x7f)) ) |
// ATTN-B: Only UTF-8 handled for now. |
{ |
|
char charref[7]; |
|
sprintf(charref, "&#%u;", (Uint16)c); |
|
out.append(charref, strlen(charref)); |
|
} |
|
else |
|
{ |
|
switch (c) |
|
{ |
|
case '&': |
|
out.append("&", 5); |
|
break; |
|
| |
case '<': |
if ( (c < Char16(0x20)) || (c == Char16(0x7f)) ) |
out.append("<", 4); |
|
break; |
|
|
|
case '>': |
|
out.append(">", 4); |
|
break; |
|
|
|
case '"': |
|
out.append(""", 6); |
|
break; |
|
|
|
case '\'': |
|
out.append("'", 6); |
|
break; |
|
|
|
default: |
|
{ |
|
// We need to convert the Char16 to UTF8 then append the UTF8 |
|
// character into the array. |
|
// NOTE: The UTF8 character could be several bytes long. |
|
// WARNING: This function will put in replacement character for |
|
// all characters that have surogate pairs. |
|
char str[6]; |
|
memset(str,0x00,sizeof(str)); |
|
char* charIN = (char *)&c; |
|
|
|
const Uint16 *strsrc = (Uint16 *)charIN; |
|
Uint16 *endsrc = (Uint16 *)&charIN[1]; |
|
|
|
Uint8 *strtgt = (Uint8 *)str; |
|
Uint8 *endtgt = (Uint8 *)&str[5]; |
|
|
|
UTF16toUTF8(&strsrc, |
|
endsrc, |
|
&strtgt, |
|
endtgt); |
|
|
|
Uint32 number1 = trailingBytesForUTF8[Uint32(str[0])]+1; |
|
out.append((Sint8 *)str,number1); |
|
} |
|
} |
|
} |
|
} |
|
|
|
inline void _appendSpecialChar(Array<Sint8>& out, char c) |
|
{ |
|
if ( ((c < Char16(0x20)) && (c >= Char16(0x00))) || (c == Char16(0x7f)) ) |
|
{ | { |
char charref[7]; | char charref[7]; |
sprintf(charref, "&#%u;", (Uint8)c); |
sprintf(charref, "&#%u;", (Uint16)c); |
out.append(charref, strlen(charref)); | out.append(charref, strlen(charref)); |
} | } |
else | else |
|
|
} | } |
} | } |
| |
|
|
static inline void _appendSpecialChar(PEGASUS_STD(ostream)& os, char c) | static inline void _appendSpecialChar(PEGASUS_STD(ostream)& os, char c) |
{ | { |
if ( (c < Char16(0x20)) || (c == Char16(0x7f)) ) | if ( (c < Char16(0x20)) || (c == Char16(0x7f)) ) |
{ | { |
char charref[7]; | char charref[7]; |
sprintf(charref, "&#%u;", (Uint8)c); |
sprintf(charref, "&#%u;", (Uint16)c); |
os << charref; | os << charref; |
} | } |
else | else |
|
|
} | } |
} | } |
| |
void _appendSurrogatePair(Array<Sint8>& out, Uint16 high, Uint16 low) |
|
{ |
|
char str[6]; |
|
char charIN[5]; |
|
memset(str,0x00,sizeof(str)); |
|
memcpy(&charIN,&high,2); |
|
memcpy(&charIN[2],&low,2); |
|
const Uint16 *strsrc = (Uint16 *)charIN; |
|
Uint16 *endsrc = (Uint16 *)&charIN[3]; |
|
|
|
Uint8 *strtgt = (Uint8 *)str; |
|
Uint8 *endtgt = (Uint8 *)&str[5]; |
|
|
|
UTF16toUTF8(&strsrc, |
|
endsrc, |
|
&strtgt, |
|
endtgt); |
|
|
|
Uint32 number1 = trailingBytesForUTF8[Uint32(str[0])]+1; |
|
out.append((Sint8 *)str,number1); |
|
} |
|
|
|
static inline void _appendSpecial(PEGASUS_STD(ostream)& os, const char* str) | static inline void _appendSpecial(PEGASUS_STD(ostream)& os, const char* str) |
{ | { |
while (*str) | while (*str) |
|
|
| |
void XmlWriter::appendSpecial(Array<Sint8>& out, char x) | void XmlWriter::appendSpecial(Array<Sint8>& out, char x) |
{ | { |
_appendSpecialChar(out, x); |
_appendSpecialChar(out, Char16(x)); |
} | } |
| |
void XmlWriter::appendSpecial(Array<Sint8>& out, const char* str) | void XmlWriter::appendSpecial(Array<Sint8>& out, const char* str) |
|
|
{ | { |
for (Uint32 i = 0; i < str.size(); i++) | for (Uint32 i = 0; i < str.size(); i++) |
{ | { |
if(((str[i] >= FIRST_HIGH_SURROGATE) && (str[i] <= LAST_HIGH_SURROGATE)) || |
|
((str[i] >= FIRST_LOW_SURROGATE) && (str[i] <= LAST_LOW_SURROGATE))) |
|
{ |
|
Char16 highSurrogate = str[i]; |
|
Char16 lowSurrogate = str[++i]; |
|
|
|
_appendSurrogatePair(out, Uint16(highSurrogate),Uint16(lowSurrogate)); |
|
} |
|
else |
|
{ |
|
_appendSpecialChar(out, str[i]); | _appendSpecialChar(out, str[i]); |
} | } |
} | } |
} |
|
| |
// See http://www.ietf.org/rfc/rfc2396.txt section 2 | // See http://www.ietf.org/rfc/rfc2396.txt section 2 |
// Reserved characters = ';' '/' '?' ':' '@' '&' '=' '+' '$' ',' | // Reserved characters = ';' '/' '?' ':' '@' '&' '=' '+' '$' ',' |
|
|
// Unwise = '{' '}' '|' '\\' '^' '[' ']' '`' | // Unwise = '{' '}' '|' '\\' '^' '[' ']' '`' |
inline void _encodeURIChar(String& outString, Char16 char16) | inline void _encodeURIChar(String& outString, Char16 char16) |
{ | { |
// We need to convert the Char16 to UTF8 then append the UTF8 |
// ATTN: Handle non-UTF-8 character sets |
// character into the array. |
char c = char16 & 0x007f; |
// NOTE: The UTF8 character could be several bytes long. |
|
// WARNING: This function will put in replacement character for |
|
// all characters that have surogate pairs. |
|
|
|
char* str = new char[6]; |
|
Uint16* charIN = (Uint16 *)&char16; |
|
|
|
const Uint16 *strsrc = (Uint16*)charIN; |
|
Uint16 *endsrc = &charIN[1]; |
|
|
|
Uint8 *strtgt = (Uint8 *)str; |
|
Uint8 *endtgt = (Uint8 *)&str[6]; |
|
|
|
UTF16toUTF8(&strsrc, |
|
endsrc, |
|
&strtgt, |
|
endtgt); |
|
|
|
// Since multi-byte UTF8 charactors fall above the 7F |
|
// range we only need to check the first byte. |
|
char c = str[0]; |
|
| |
#ifndef PEGASUS_DO_NOT_IMPLEMENT_URI_ENCODING | #ifndef PEGASUS_DO_NOT_IMPLEMENT_URI_ENCODING |
if ( ((c <= 0x20) && (c >= 0x00)) || // Control characters + space char |
if ( (c <= 0x20) || // Control characters + space char |
( (c >= 0x22) && (c <= 0x26) ) || // '"' '#' '$' '%' '&' | ( (c >= 0x22) && (c <= 0x26) ) || // '"' '#' '$' '%' '&' |
(c == 0x2b) || // '+' | (c == 0x2b) || // '+' |
(c == 0x2c) || // ',' | (c == 0x2c) || // ',' |
|
|
else | else |
#endif | #endif |
{ | { |
outString.append(char16); |
outString.append(c); |
} | } |
} | } |
| |
|
|
{ | { |
out << "<LOCALNAMESPACEPATH>\n"; | out << "<LOCALNAMESPACEPATH>\n"; |
| |
char* nameSpaceCopy = strdup(nameSpace.getString().getCStringUTF8()); |
char* nameSpaceCopy = strdup(nameSpace.getString().getCString()); |
#ifdef PEGASUS_PLATFORM_SOLARIS_SPARC_CC | #ifdef PEGASUS_PLATFORM_SOLARIS_SPARC_CC |
char *last; | char *last; |
for (const char* p = strtok_r(nameSpaceCopy, "/", &last); p; | for (const char* p = strtok_r(nameSpaceCopy, "/", &last); p; |