version 1.109, 2005/05/18 20:34:36
|
version 1.111.2.6, 2005/09/29 15:10:55
|
|
|
// | // |
// Author: Mike Brasher (mbrasher@bmc.com) | // Author: Mike Brasher (mbrasher@bmc.com) |
// | // |
// Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) |
|
// Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297 |
|
// |
|
//%///////////////////////////////////////////////////////////////////////////// | //%///////////////////////////////////////////////////////////////////////////// |
| |
|
#define PEGASUS_USE_INTERNAL_INLINES |
#include <cctype> |
|
#include <cstring> |
|
#include "String.h" | #include "String.h" |
#include "Array.h" |
#include <cassert> |
#include "AutoPtr.h" |
|
#include "InternalException.h" | #include "InternalException.h" |
#include <iostream> |
|
#include <fstream> |
|
#include <Pegasus/Common/CommonUTF.h> |
|
|
|
#include "CommonUTF.h" | #include "CommonUTF.h" |
|
#include "CharSet.h" |
| |
#ifdef PEGASUS_HAS_ICU |
#ifdef PEGASUS_STRING_ENABLE_ICU |
#include <unicode/ustring.h> | #include <unicode/ustring.h> |
#include <unicode/uchar.h> | #include <unicode/uchar.h> |
#endif | #endif |
| |
PEGASUS_USING_STD; |
|
|
|
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
/////////////////////////////////////////////////////////////////////////////// |
//============================================================================== |
|
// |
|
// Compile-time switches (defined macros). |
|
// |
|
// PEGASUS_STRING_ENABLE_ICU -- enables use of ICU package. |
|
// |
|
// PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions |
// | // |
// CString |
// PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. |
// | // |
/////////////////////////////////////////////////////////////////////////////// |
// PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature. |
|
// |
|
//============================================================================== |
|
|
|
//============================================================================== |
|
// |
|
// File-scope definitions: |
|
// |
|
//============================================================================== |
| |
CString::CString() |
// Converts 16-bit characters to upper case. |
: _rep(0) |
inline Uint16 _to_upper(Uint16 x) |
{ | { |
|
return (x & 0xFF00) ? x : CharSet::to_upper(x); |
} | } |
| |
CString::CString(const CString& cstr) |
// Converts 16-bit characters to lower case. |
|
inline Uint16 _to_lower(Uint16 x) |
{ | { |
_rep = 0; |
return (x & 0xFF00) ? x : CharSet::to_lower(x); |
|
} |
| |
if (cstr._rep) |
// Rounds x to the next power of two (or just returns 8 if x < 8). |
|
static Uint32 _next_pow_2(Uint32 x) |
{ | { |
_rep = (void*)new char[strlen((char*)cstr._rep)+1]; |
if (x < 8) |
strcpy((char*)_rep, (char*)cstr._rep); |
return 8; |
|
|
|
x--; |
|
x |= (x >> 1); |
|
x |= (x >> 2); |
|
x |= (x >> 4); |
|
x |= (x >> 8); |
|
x |= (x >> 16); |
|
x++; |
|
|
|
return x; |
} | } |
|
|
|
template<class P, class Q> |
|
static void _copy(P* p, const Q* q, size_t n) |
|
{ |
|
// Use loop unrolling. |
|
|
|
while (n >= 8) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p[4] = q[4]; |
|
p[5] = q[5]; |
|
p[6] = q[6]; |
|
p[7] = q[7]; |
|
p += 8; |
|
q += 8; |
|
n -= 8; |
} | } |
| |
CString::CString(char* cstr) |
while (n >= 4) |
: _rep(cstr) |
|
{ | { |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p += 4; |
|
q += 4; |
|
n -= 4; |
} | } |
| |
CString::~CString() |
while (n--) |
|
*p++ = *q++; |
|
} |
|
|
|
static Uint16* _find(const Uint16* s, size_t n, Uint16 c) |
{ | { |
if (_rep) |
while (n >= 4) |
{ | { |
delete [] (char*)_rep; |
if (s[0] == c) |
|
return (Uint16*)s; |
|
if (s[1] == c) |
|
return (Uint16*)&s[1]; |
|
if (s[2] == c) |
|
return (Uint16*)&s[2]; |
|
if (s[3] == c) |
|
return (Uint16*)&s[3]; |
|
|
|
n -= 4; |
|
s += 4; |
} | } |
|
|
|
if (n) |
|
{ |
|
if (*s == c) |
|
return (Uint16*)s; |
|
s++; |
|
n--; |
} | } |
| |
CString& CString::operator=(const CString& cstr) |
if (n) |
{ | { |
if (&cstr != this) |
if (*s == c) |
|
return (Uint16*)s; |
|
s++; |
|
n--; |
|
} |
|
|
|
if (n && *s == c) |
|
return (Uint16*)s; |
|
|
|
// Not found! |
|
return 0; |
|
} |
|
|
|
static int _compare(const Uint16* s1, const Uint16* s2) |
{ | { |
if (_rep) |
while (*s1 && *s2) |
{ | { |
delete [] (char*)_rep; |
int r = *s1++ - *s2++; |
_rep = 0; |
|
|
if (r) |
|
return r; |
} | } |
if (cstr._rep) |
|
{ |
if (*s2) |
_rep = (char*)new char[strlen((char*)cstr._rep)+1]; |
return -1; |
strcpy((char*)_rep, (char*)cstr._rep); |
else if (*s1) |
|
return 1; |
|
|
|
return 0; |
} | } |
|
|
|
static int _compare_no_utf8(const Uint16* s1, const char* s2) |
|
{ |
|
Uint16 c1; |
|
Uint16 c2; |
|
|
|
do |
|
{ |
|
c1 = *s1++; |
|
c2 = *s2++; |
|
|
|
if (c1 == 0) |
|
return c1 - c2; |
} | } |
return *this; |
while (c1 == c2); |
|
|
|
return c1 - c2; |
} | } |
| |
CString::operator const char*() const |
static int _compare(const Uint16* s1, const Uint16* s2, size_t n) |
{ | { |
return (char*)_rep; |
// This should only be called when s1 and s2 have the same length. |
|
|
|
while (n-- && (*s1++ - *s2++) == 0) |
|
; |
|
|
|
return s1[-1] - s2[-1]; |
} | } |
| |
/////////////////////////////////////////////////////////////////////////////// |
static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) |
// |
{ |
// String |
memcpy(s1, s2, n * sizeof(Uint16)); |
// |
} |
/////////////////////////////////////////////////////////////////////////////// |
|
| |
const String String::EMPTY = String(); |
void String_throw_out_of_bounds() |
|
{ |
|
throw IndexOutOfBoundsException(); |
|
} |
| |
inline Uint32 _StrLen(const Char16* str) |
#ifdef PEGASUS_STRING_NO_THROW |
|
# define _check_null_pointer(ARG) /* empty */ |
|
#else |
|
template<class T> |
|
inline void _check_null_pointer(const T* ptr) |
{ | { |
if (!str) |
if (!ptr) |
throw NullPointer(); | throw NullPointer(); |
|
} |
|
#endif |
| |
Uint32 n = 0; |
static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n) |
|
{ |
|
Uint16* p = dest; |
|
const Uint8* q = (const Uint8*)src; |
| |
while (*str++) |
// Process leading 7-bit ASCII characters (to avoid UTF8 overhead below |
n++; |
// this loop). Use factor-four loop-unrolling. |
| |
return n; |
while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p += 4; |
|
q += 4; |
|
n -= 4; |
} | } |
| |
// |
switch (n) |
// Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array. |
|
// n is the length of the input char *, if stopAtTerm is 0 |
|
// A terminator character is appended to the end. |
|
// Note that each input char is converted individually, which gives |
|
// the fastest performance. |
|
// |
|
void _convertAndAppend(const char* str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm) |
|
{ | { |
Uint32 i = 0; |
case 0: |
while ((stopAtTerm && *str) || (!stopAtTerm && i < n)) |
return p - dest; |
|
case 1: |
|
if (q[0] < 128) |
{ | { |
if (*(Uint8*)str <= 0x7f) |
p[0] = q[0]; |
|
return p + 1 - dest; |
|
} |
|
break; |
|
case 2: |
|
if (q[0] < 128 && q[1] < 128) |
{ | { |
// Current byte sequence is in the us-ascii range. |
p[0] = q[0]; |
c16a.append(Uint8(*str++)); |
p[1] = q[1]; |
|
return p + 2 - dest; |
} | } |
else |
break; |
|
case 3: |
|
if (q[0] < 128 && q[1] < 128 && q[2] < 128) |
{ | { |
// |
p[0] = q[0]; |
// Current byte sequence is not in the us-ascii range. |
p[1] = q[1]; |
// |
p[2] = q[2]; |
|
return p + 3 - dest; |
|
} |
|
break; |
|
} |
|
|
|
// Process remaining characters. |
|
|
|
while (n) |
|
{ |
|
// Optimize for 7-bit ASCII case. |
| |
// Check if the byte sequence is valid utf-8, and if so, |
if (*q < 128) |
// call the converter to utf-16 |
{ |
Uint16 tgt[3]; |
*p++ = *q++; |
tgt[1] = 0; |
n--; |
Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*str); |
|
if ( (!stopAtTerm && i + c >= n) || |
|
(!isValid_U8((const Uint8 *)str, c+1)) ) |
|
{ |
|
// Note about error conditions. |
|
// It is possible that the last utf-8 char before the |
|
// end of input string extends past the end of the input string. |
|
// This is caught in both cases - |
|
// If counting up to n, then the test above catches it. |
|
// If converting until terminator found, then a terminator |
|
// in the middle of a multi-byte utf-8 char is invalid. |
|
MessageLoaderParms parms("Common.String.BAD_UTF8", |
|
"The byte sequence starting at index $0 is not valid UTF-8 encoding.", |
|
i); |
|
throw Exception(parms); |
|
} | } |
else | else |
{ | { |
// str is incremented by this call to the start of the next char |
Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1; |
Uint16 * tgtBuf = tgt; |
|
UTF8toUTF16((const Uint8 **)&str, (Uint8 *)&str[c+1], &tgtBuf, &tgtBuf[2]); |
if (c > n || !isValid_U8(q, c) || |
c16a.append(tgt[0]); |
UTF8toUTF16(&q, q + c, &p, p + n) != 0) |
if (tgt[1]) |
|
{ | { |
// Its a utf-16 surrogate pair (uses 2 Char16's) |
throw Exception("Bad UTF8 encoding"); |
c16a.append(tgt[1]); |
|
} | } |
| |
// bump by the trailing byte count |
n -= c; |
i += c; |
|
} | } |
} | } |
| |
i++; |
return p - dest; |
} // end while |
} |
| |
c16a.append('\0'); |
// Note: dest must be at least three times src (plus an extra byte for |
|
// terminator). |
|
static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n) |
|
{ |
|
const Uint16* q = src; |
|
Uint8* p = (Uint8*)dest; |
|
|
|
while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p += 4; |
|
q += 4; |
|
n -= 4; |
} | } |
| |
class StringRep |
switch (n) |
|
{ |
|
case 0: |
|
return p - (Uint8*)dest; |
|
case 1: |
|
if (q[0] < 128) |
|
{ |
|
p[0] = q[0]; |
|
return p + 1 - (Uint8*)dest; |
|
} |
|
break; |
|
case 2: |
|
if (q[0] < 128 && q[1] < 128) |
{ | { |
public: |
p[0] = q[0]; |
StringRep() |
p[1] = q[1]; |
{} |
return p + 2 - (Uint8*)dest; |
StringRep(const StringRep& r) |
} |
: c16a(r.c16a) |
break; |
{} |
case 3: |
StringRep(const Char16* str) |
if (q[0] < 128 && q[1] < 128 && q[2] < 128) |
: c16a(str, _StrLen(str) + 1) |
{ |
{} |
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
return p + 3 - (Uint8*)dest; |
|
} |
|
break; |
|
} |
| |
Array<Char16> c16a; |
// If this line was reached, there must be characters greater than 128. |
}; |
|
|
UTF16toUTF8(&q, q + n, &p, p + 3 * n); |
| |
String::String() |
return p - (Uint8*)dest; |
|
} |
|
|
|
static inline size_t _convert(Uint16* p, const char* q, size_t n) |
{ | { |
_rep = new StringRep; |
#ifdef PEGASUS_STRING_NO_UTF8 |
_rep->c16a.append('\0'); |
_copy(p, q, n); |
|
return n; |
|
#else |
|
return _copy_from_utf8(p, q, n); |
|
#endif |
} | } |
| |
String::String(const String& str) |
//============================================================================== |
|
// |
|
// class CString |
|
// |
|
//============================================================================== |
|
|
|
CString::CString(const CString& cstr) : _rep(0) |
{ | { |
if (str._rep != NULL) |
if (cstr._rep) |
{ | { |
_rep = new StringRep(*str._rep); |
size_t n = strlen(cstr._rep) + 1; |
|
_rep = (char*)operator new(n); |
|
memcpy(_rep, cstr._rep, n); |
} | } |
else |
} |
|
|
|
CString& CString::operator=(const CString& cstr) |
{ | { |
_rep = new StringRep(); |
if (&cstr != this) |
|
{ |
|
if (_rep) |
|
{ |
|
operator delete(_rep); |
|
_rep = 0; |
|
} |
|
|
|
if (cstr._rep) |
|
{ |
|
size_t n = strlen(cstr._rep) + 1; |
|
_rep = (char*)operator new(n); |
|
memcpy(_rep, cstr._rep, n); |
} | } |
} | } |
| |
|
return *this; |
|
} |
| |
String::String(const String& str, Uint32 n) |
//============================================================================== |
|
// |
|
// class StringRep |
|
// |
|
//============================================================================== |
|
|
|
StringRep StringRep::_empty_rep; |
|
|
|
inline StringRep* StringRep::alloc(size_t cap) |
{ | { |
_rep = new StringRep; |
StringRep* rep = (StringRep*)::operator new( |
assign(str.getChar16Data(), n); |
sizeof(StringRep) + cap * sizeof(Uint16)); |
|
rep->cap = cap; |
|
Atomic_create(&rep->refs, 1); |
|
|
|
return rep; |
} | } |
| |
String::String(const Char16* str) |
static inline void _reserve(StringRep*& rep, Uint32 cap) |
{ | { |
if ( str == 0 ) |
if (cap > rep->cap || Atomic_get(&rep->refs) != 1) |
{ | { |
throw NullPointer(); |
size_t n = _next_pow_2(cap); |
|
StringRep* new_rep = StringRep::alloc(n); |
|
new_rep->size = rep->size; |
|
_copy(new_rep->data, rep->data, rep->size + 1); |
|
StringRep::unref(rep); |
|
rep = new_rep; |
|
} |
} | } |
| |
_rep = new StringRep(str); |
StringRep* StringRep::create(const Uint16* data, size_t size) |
|
{ |
|
StringRep* rep = StringRep::alloc(size); |
|
rep->size = size; |
|
_copy(rep->data, data, size); |
|
rep->data[size] = '\0'; |
|
return rep; |
} | } |
| |
String::String(const Char16* str, Uint32 n) |
StringRep* StringRep::copy_on_write(StringRep* rep) |
{ | { |
if ( str == 0 ) |
// Return a new copy of rep. Release rep. |
|
|
|
StringRep* new_rep = StringRep::alloc(rep->size); |
|
new_rep->size = rep->size; |
|
_copy(new_rep->data, rep->data, rep->size); |
|
new_rep->data[new_rep->size] = '\0'; |
|
StringRep::unref(rep); |
|
return new_rep; |
|
} |
|
|
|
StringRep* StringRep::create(const char* data, size_t size) |
{ | { |
throw NullPointer(); |
StringRep* rep = StringRep::alloc(size); |
|
rep->size = _convert((Uint16*)rep->data, data, size); |
|
rep->data[rep->size] = '\0'; |
|
|
|
return rep; |
} | } |
| |
_rep = new StringRep; |
StringRep* StringRep::createASCII7(const char* data, size_t size) |
assign(str, n); |
{ |
|
StringRep* rep = StringRep::alloc(size); |
|
_copy((Uint16*)rep->data, data, size); |
|
rep->data[rep->size = size] = '\0'; |
|
return rep; |
} | } |
| |
String::String(const char* str) |
Uint32 StringRep::length(const Uint16* str) |
{ | { |
if ( str == 0 ) |
// Note: We could unroll this but it is rarely called. |
|
|
|
const Uint16* end = (Uint16*)str; |
|
|
|
while (*end++) |
|
; |
|
|
|
return end - str - 1; |
|
} |
|
|
|
//============================================================================== |
|
// |
|
// class String |
|
// |
|
//============================================================================== |
|
|
|
const String String::EMPTY; |
|
|
|
String::String(const String& str, Uint32 n) |
{ | { |
throw NullPointer(); |
_check_bounds(n, str._rep->size); |
|
_rep = StringRep::create(str._rep->data, n); |
} | } |
| |
_rep = new StringRep; |
String::String(const Char16* str) |
AutoPtr<StringRep> tempRep(_rep); |
{ |
// An exception can be thrown, so use a temp AutoPtr. |
_check_null_pointer(str); |
_convertAndAppend(str, _rep->c16a, 0, 1); |
_rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str)); |
tempRep.release(); |
|
} | } |
| |
String::String(const char* str, Uint32 n) |
String::String(const Char16* str, Uint32 n) |
{ | { |
if ( str == 0 ) |
_check_null_pointer(str); |
|
_rep = StringRep::create((Uint16*)str, n); |
|
} |
|
|
|
String::String(const char* str) |
{ | { |
throw NullPointer(); |
_check_null_pointer(str); |
|
_rep = StringRep::create(str, strlen(str)); |
} | } |
| |
_rep = new StringRep; |
String::String(const char* str, String::ASCII7Tag tag) |
AutoPtr<StringRep> tempRep(_rep); |
{ |
// An exception can be thrown, so use a temp AutoPtr. |
_check_null_pointer(str); |
_convertAndAppend(str, _rep->c16a, n, 0); |
_rep = StringRep::createASCII7(str, strlen(str)); |
tempRep.release(); |
|
} | } |
| |
String::~String() |
String::String(const char* str, Uint32 n) |
{ | { |
delete _rep; |
_check_null_pointer(str); |
|
_rep = StringRep::create(str, n); |
} | } |
| |
String& String::operator=(const String& str) |
String::String(const char* str, size_t n, String::ASCII7Tag tag) |
{ | { |
if (&str != this) |
_check_null_pointer(str); |
|
_rep = StringRep::createASCII7(str, n); |
|
} |
|
|
|
String::String(const String& s1, const String& s2) |
{ | { |
assign(str); |
size_t n1 = s1._rep->size; |
|
size_t n2 = s2._rep->size; |
|
size_t n = n1 + n2; |
|
_rep = StringRep::alloc(n); |
|
_copy(_rep->data, s1._rep->data, n1); |
|
_copy(_rep->data + n1, s2._rep->data, n2); |
|
_rep->size = n; |
|
_rep->data[n] = '\0'; |
} | } |
return *this; |
|
|
String::String(const String& s1, const char* s2) |
|
{ |
|
_check_null_pointer(s2); |
|
size_t n1 = s1._rep->size; |
|
size_t n2 = strlen(s2); |
|
_rep = StringRep::alloc(n1 + n2); |
|
_copy(_rep->data, s1._rep->data, n1); |
|
_rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2); |
|
_rep->data[_rep->size] = '\0'; |
} | } |
| |
String& String::assign(const String& str) |
String::String(const char* s1, const String& s2) |
{ | { |
_rep->c16a = str._rep->c16a; |
_check_null_pointer(s1); |
return *this; |
size_t n1 = strlen(s1); |
|
size_t n2 = s2._rep->size; |
|
_rep = StringRep::alloc(n1 + n2); |
|
_rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1); |
|
_copy(_rep->data + n1, s2._rep->data, n2); |
|
_rep->data[_rep->size] = '\0'; |
} | } |
| |
String& String::assign(const Char16* str) |
String& String::assign(const String& str) |
{ | { |
if ( str == 0 ) |
if (_rep != str._rep) |
{ | { |
throw NullPointer(); |
StringRep::unref(_rep); |
|
StringRep::ref(_rep = str._rep); |
} | } |
| |
_rep->c16a.clear(); |
|
_rep->c16a.append(str, _StrLen(str) + 1); |
|
return *this; | return *this; |
} | } |
| |
String& String::assign(const Char16* str, Uint32 n) | String& String::assign(const Char16* str, Uint32 n) |
{ | { |
if ( str == 0 ) |
_check_null_pointer(str); |
|
|
|
if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) |
{ | { |
throw NullPointer(); |
StringRep::unref(_rep); |
|
_rep = StringRep::alloc(n); |
} | } |
| |
_rep->c16a.clear(); |
_rep->size = n; |
_rep->c16a.append(str, n); |
_copy(_rep->data, (Uint16*)str, n); |
_rep->c16a.append('\0'); |
_rep->data[n] = '\0'; |
|
|
return *this; | return *this; |
} | } |
| |
String& String::assign(const char* str) |
String& String::assign(const char* str, Uint32 n) |
{ | { |
if ( str == 0 ) |
_check_null_pointer(str); |
|
|
|
if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) |
{ | { |
throw NullPointer(); |
StringRep::unref(_rep); |
|
_rep = StringRep::alloc(n); |
} | } |
| |
_rep->c16a.clear(); |
_rep->size = _convert(_rep->data, str, n); |
_convertAndAppend(str, _rep->c16a, 0, 1); |
_rep->data[_rep->size] = 0; |
|
|
return *this; | return *this; |
} | } |
| |
String& String::assign(const char* str, Uint32 n) |
String& String::assignASCII7(const char* str, Uint32 n) |
{ | { |
if ( str == 0 ) |
_check_null_pointer(str); |
|
|
|
if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) |
{ | { |
throw NullPointer(); |
StringRep::unref(_rep); |
|
_rep = StringRep::alloc(n); |
} | } |
| |
_rep->c16a.clear(); |
_copy(_rep->data, str, n); |
_convertAndAppend(str, _rep->c16a, n, 0); |
_rep->data[_rep->size = n] = 0; |
|
|
return *this; | return *this; |
} | } |
| |
void String::clear() | void String::clear() |
{ | { |
_rep->c16a.clear(); |
if (_rep->size) |
_rep->c16a.append('\0'); |
|
} |
|
|
|
void String::reserveCapacity(Uint32 capacity) |
|
{ | { |
_rep->c16a.reserveCapacity(capacity + 1); |
if (Atomic_get(&_rep->refs) == 1) |
} |
_rep->size = 0; |
|
else |
Uint32 String::size() const |
|
{ | { |
//#if defined (PEGASUS_OS_VMS) |
StringRep::unref(_rep); |
// |
_rep = &StringRep::_empty_rep; |
// This prevents returning a minus number. |
} |
// |
} |
// Seems as though the first time through |
|
// the XML parser something doesn't get |
|
// initialized and there is no check for |
|
// a negative number in the parser! |
|
// |
|
// Uint32 foo; |
|
// foo = _rep->c16a.size(); |
|
// if (foo == 0) |
|
// { |
|
// return 0; |
|
// } |
|
// else |
|
// { |
|
// return (foo -1); |
|
// } |
|
//#else |
|
return _rep->c16a.size() - 1; |
|
//#endif |
|
} | } |
| |
const Char16* String::getChar16Data() const |
void String::reserveCapacity(Uint32 cap) |
{ | { |
return _rep->c16a.getData(); |
_reserve(_rep, cap); |
} | } |
| |
Char16& String::operator[](Uint32 index) |
CString String::getCString() const |
{ | { |
if (index > size()) |
#ifdef PEGASUS_STRING_NO_UTF8 |
throw IndexOutOfBoundsException(); |
char* str = (char*)operator new(_rep->size + 1); |
|
_copy(str, _rep->data, _rep->size); |
return _rep->c16a[index]; |
str[_rep->size] = '\0'; |
|
return CString(str); |
|
#else |
|
Uint32 n = 3 * _rep->size; |
|
char* str = (char*)operator new(n + 1); |
|
size_t size = _copy_to_utf8(str, _rep->data, _rep->size); |
|
str[size] = '\0'; |
|
return CString(str); |
|
#endif |
} | } |
| |
const Char16 String::operator[](Uint32 index) const |
String& String::append(const Char16* str, Uint32 n) |
{ | { |
if (index > size()) |
_check_null_pointer(str); |
throw IndexOutOfBoundsException(); |
|
| |
return _rep->c16a[index]; |
size_t old_size = _rep->size; |
} |
size_t new_size = old_size + n; |
|
_reserve(_rep, new_size); |
|
_copy(_rep->data + old_size, (Uint16*)str, n); |
|
_rep->size = new_size; |
|
_rep->data[new_size] = '\0'; |
| |
String& String::append(const Char16& c) |
|
{ |
|
_rep->c16a.insert(_rep->c16a.size() - 1, c); |
|
return *this; | return *this; |
} | } |
| |
String& String::append(const Char16* str, Uint32 n) |
String& String::append(const String& str) |
{ |
|
if (str == 0) |
|
{ | { |
throw NullPointer(); |
return append((Char16*)str._rep->data, str._rep->size); |
} | } |
| |
_rep->c16a.reserveCapacity(_rep->c16a.size() + n); |
String& String::append(const char* str, Uint32 size) |
_rep->c16a.remove(_rep->c16a.size() - 1); |
{ |
_rep->c16a.append(str, n); |
_check_null_pointer(str); |
_rep->c16a.append('\0'); |
|
|
size_t old_size = _rep->size; |
|
size_t cap = old_size + size; |
|
|
|
_reserve(_rep, cap); |
|
_rep->size += _convert((Uint16*)_rep->data + old_size, str, size); |
|
_rep->data[_rep->size] = '\0'; |
|
|
return *this; | return *this; |
} | } |
| |
String& String::append(const String& str) |
void String::remove(Uint32 index, Uint32 n) |
{ | { |
return append(str.getChar16Data(), str.size()); |
if (n == PEG_NOT_FOUND) |
} |
n = _rep->size - index; |
| |
void String::remove(Uint32 index, Uint32 size) |
_check_bounds(index + n, _rep->size); |
{ |
|
if (size == PEG_NOT_FOUND) |
|
size = this->size() - index; |
|
| |
if (index + size > this->size()) |
if (Atomic_get(&_rep->refs) != 1) |
throw IndexOutOfBoundsException(); |
_rep = StringRep::copy_on_write(_rep); |
|
|
|
assert(index + n <= _rep->size); |
| |
if (size) |
size_t rem = _rep->size - (index + n); |
_rep->c16a.remove(index, size); |
Uint16* data = _rep->data; |
|
|
|
if (rem) |
|
memmove(data + index, data + index + n, rem * sizeof(Uint16)); |
|
|
|
_rep->size -= n; |
|
data[_rep->size] = '\0'; |
} | } |
| |
String String::subString(Uint32 index, Uint32 length) const |
String String::subString(Uint32 index, Uint32 n) const |
{ | { |
if (index < size()) |
// Note: this implementation is very permissive but used for |
|
// backwards compatibility. |
|
|
|
if (index < _rep->size) |
{ | { |
if ((length == PEG_NOT_FOUND) || (length > size() - index)) |
if (n == PEG_NOT_FOUND || n > _rep->size - index) |
length = size() - index; |
n = _rep->size - index; |
| |
return String(getChar16Data() + index, length); |
return String((Char16*)_rep->data + index, n); |
} | } |
| |
return String(); | return String(); |
|
|
| |
Uint32 String::find(Char16 c) const | Uint32 String::find(Char16 c) const |
{ | { |
const Char16* first = getChar16Data(); |
Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c); |
| |
for (const Char16* p = first; *p; p++) |
if (p) |
{ |
return p - _rep->data; |
if (*p == c) |
|
return p - first; |
|
} |
|
| |
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
} | } |
| |
Uint32 String::find(Uint32 index, Char16 c) const | Uint32 String::find(Uint32 index, Char16 c) const |
{ | { |
const Char16* data = getChar16Data(); |
_check_bounds(index, _rep->size); |
| |
for (Uint32 i = index, n = size(); i < n; i++) |
if (index >= _rep->size) |
{ |
return PEG_NOT_FOUND; |
if (data[i] == c) |
|
return i; |
Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c); |
} |
|
|
if (p) |
|
return p - _rep->data; |
| |
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
} | } |
| |
Uint32 String::find(const String& s) const |
Uint32 String::_find_aux(const Char16* s, Uint32 n) const |
{ | { |
const Char16* pSubStr = s.getChar16Data(); |
_check_null_pointer(s); |
const Char16* pStr = getChar16Data(); |
|
Uint32 subStrLen = s.size(); |
|
Uint32 strLen = size(); |
|
| |
if (subStrLen > strLen) |
const Uint16* data = _rep->data; |
|
size_t rem = _rep->size; |
|
|
|
while (n <= rem) |
{ | { |
return PEG_NOT_FOUND; |
Uint16* p = (Uint16*)_find(data, rem, s[0]); |
} |
|
| |
// loop to find first char match |
if (!p) |
Uint32 loc = 0; |
break; |
for( ; loc <= (strLen-subStrLen); loc++) |
|
{ |
if (memcmp(p, s, n * sizeof(Uint16)) == 0) |
if (*pStr++ == *pSubStr) // match first char |
return p - _rep->data; |
{ |
|
// point to substr 2nd char |
p++; |
const Char16* p = pSubStr + 1; |
rem -= p - data; |
|
data = p; |
// Test remaining chars for equal |
|
Uint32 i = 1; |
|
for (; i < subStrLen; i++) |
|
if (*pStr++ != *p++ ) |
|
{pStr-=i; break;} // break from loop |
|
if (i == subStrLen) |
|
return loc; |
|
} |
|
} | } |
|
|
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
} | } |
| |
|
Uint32 String::find(const char* s) const |
|
{ |
|
_check_null_pointer(s); |
|
|
|
// Note: could optimize away creation of temporary, but this is rarely |
|
// called. |
|
return find(String(s)); |
|
} |
|
|
Uint32 String::reverseFind(Char16 c) const | Uint32 String::reverseFind(Char16 c) const |
{ | { |
const Char16* first = getChar16Data(); |
Uint16 x = c; |
const Char16* last = getChar16Data() + size(); |
Uint16* p = _rep->data; |
|
Uint16* q = _rep->data + _rep->size; |
| |
while (last != first) |
while (q != p) |
{ | { |
if (*--last == c) |
if (*--q == x) |
return last - first; |
return q - p; |
} | } |
| |
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
|
|
| |
void String::toLower() | void String::toLower() |
{ | { |
#ifdef PEGASUS_HAS_ICU |
#ifdef PEGASUS_STRING_ENABLE_ICU |
|
|
if (InitializeICU::initICUSuccessful()) | if (InitializeICU::initICUSuccessful()) |
{ | { |
// This will do a locale-insensitive, but context-sensitive convert. |
//// First calculate size of resulting string. u_strToLower() returns |
// Context-sensitive prevents any optimizations that try to |
//// only the size when zero is passed as the destination size argument. |
// convert just the ascii before calling ICU. |
|
// The string may shrink or expand after the convert. |
|
|
|
int32_t sz = size(); |
|
UChar* destbuf = new UChar[sz + 1]; |
|
const UChar* srcbuf = (const UChar *)getChar16Data(); |
|
UErrorCode err = U_ZERO_ERROR; | UErrorCode err = U_ZERO_ERROR; |
| |
int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err); |
int32_t new_size = u_strToLower( |
if (err == U_BUFFER_OVERFLOW_ERROR) |
NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err); |
{ |
|
delete [] destbuf; |
|
destbuf = new UChar[needed + 1]; |
|
err = U_ZERO_ERROR; |
|
u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err); |
|
} |
|
if (U_FAILURE(err)) |
|
{ |
|
delete [] destbuf; |
|
throw Exception(u_errorName(err)); |
|
} |
|
| |
if (needed == sz) |
//// Reserve enough space for the result. |
{ |
|
Char16* from = (Char16*)destbuf; |
if ((Uint32)new_size > _rep->cap) |
for (Char16* to = &_rep->c16a[0]; *to; to++, from++) |
_reserve(_rep, new_size); |
{ |
|
*to = *from; |
|
} |
|
} |
|
else |
|
{ |
|
assign((Char16 *)destbuf, needed); |
|
} |
|
| |
delete [] destbuf; |
//// Perform the conversion (overlapping buffers are allowed). |
|
|
|
u_strToLower((UChar*)_rep->data, new_size, |
|
(UChar*)_rep->data, _rep->size, NULL, &err); |
|
|
|
_rep->size = new_size; |
} | } |
else |
|
#endif |
#endif /* PEGASUS_STRING_ENABLE_ICU */ |
{ |
|
for (Char16* p = &_rep->c16a[0]; *p; p++) |
if (Atomic_get(&_rep->refs) != 1) |
|
_rep = StringRep::copy_on_write(_rep); |
|
|
|
Uint16* p = _rep->data; |
|
size_t n = _rep->size; |
|
|
|
for (; n--; p++) |
{ | { |
if (*p <= PEGASUS_MAX_PRINTABLE_CHAR) |
if (!(*p & 0xFF00)) |
*p = tolower(*p); |
*p = _to_lower(*p); |
} |
|
} | } |
} | } |
| |
void String::toUpper() | void String::toUpper() |
{ | { |
#ifdef PEGASUS_HAS_ICU |
#ifdef PEGASUS_STRING_ENABLE_ICU |
|
|
if (InitializeICU::initICUSuccessful()) | if (InitializeICU::initICUSuccessful()) |
{ | { |
// This will do a locale-insensitive, but context-sensitive convert. |
//// First calculate size of resulting string. u_strToUpper() returns |
// Context-sensitive prevents any optimizations that try to |
//// only the size when zero is passed as the destination size argument. |
// convert just the ascii before calling ICU. |
|
// The string may shrink or expand after the convert. |
|
|
|
int32_t sz = size(); |
|
UChar* destbuf = new UChar[sz + 1]; |
|
const UChar* srcbuf = (const UChar *)getChar16Data(); |
|
UErrorCode err = U_ZERO_ERROR; | UErrorCode err = U_ZERO_ERROR; |
| |
int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err); |
int32_t new_size = u_strToUpper( |
if (err == U_BUFFER_OVERFLOW_ERROR) |
NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err); |
{ |
|
delete [] destbuf; |
//// Reserve enough space for the result. |
destbuf = new UChar[needed + 1]; |
|
err = U_ZERO_ERROR; |
if ((Uint32)new_size > _rep->cap) |
u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err); |
_reserve(_rep, new_size); |
|
|
|
//// Perform the conversion (overlapping buffers are allowed). |
|
|
|
u_strToUpper((UChar*)_rep->data, new_size, |
|
(UChar*)_rep->data, _rep->size, NULL, &err); |
|
|
|
_rep->size = new_size; |
} | } |
if (U_FAILURE(err)) |
|
{ |
#endif /* PEGASUS_STRING_ENABLE_ICU */ |
delete [] destbuf; |
|
throw Exception(u_errorName(err)); |
if (Atomic_get(&_rep->refs) != 1) |
|
_rep = StringRep::copy_on_write(_rep); |
|
|
|
Uint16* p = _rep->data; |
|
size_t n = _rep->size; |
|
|
|
for (; n--; p++) |
|
*p = _to_upper(*p); |
} | } |
| |
if (needed == sz) |
int String::compare(const String& s1, const String& s2, Uint32 n) |
{ |
|
Char16* from = (Char16*)destbuf; |
|
for (Char16* to = &_rep->c16a[0]; *to; to++, from++) |
|
{ | { |
*to = *from; |
assert(n <= s1._rep->size); |
} |
assert(n <= s2._rep->size); |
|
|
|
// Ignoring error in which n is greater than s1.size() or s2.size() |
|
return _compare(s1._rep->data, s2._rep->data, n); |
} | } |
else |
|
|
int String::compare(const String& s1, const String& s2) |
{ | { |
assign((Char16 *)destbuf, needed); |
return _compare(s1._rep->data, s2._rep->data); |
} | } |
| |
delete [] destbuf; |
int String::compare(const String& s1, const char* s2) |
} |
|
else |
|
#endif |
|
{ | { |
for (Char16* p = &_rep->c16a[0]; *p; p++) |
_check_null_pointer(s2); |
{ |
|
if (*p <= PEGASUS_MAX_PRINTABLE_CHAR) |
#ifdef PEGASUS_STRING_NO_UTF8 |
*p = toupper(*p); |
return _compare_no_utf8(s1._rep->data, s2); |
} |
#else |
} |
// ATTN: optimize this! |
|
return String::compare(s1, String(s2)); |
|
#endif |
} | } |
| |
int String::compare(const String& s1, const String& s2, Uint32 n) |
int String::compareNoCase(const String& str1, const String& str2) |
{ | { |
const Char16* s1c16 = s1.getChar16Data(); |
#ifdef PEGASUS_STRING_ENABLE_ICU |
const Char16* s2c16 = s2.getChar16Data(); |
|
| |
while (n--) |
if (InitializeICU::initICUSuccessful()) |
{ | { |
int r = *s1c16++ - *s2c16++; |
return u_strcasecmp( |
|
str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT); |
if (r) |
|
return r; |
|
} | } |
| |
return 0; |
#endif /* PEGASUS_STRING_ENABLE_ICU */ |
} |
|
| |
int String::compare(const String& s1, const String& s2) |
const Uint16* s1 = str1._rep->data; |
{ |
const Uint16* s2 = str2._rep->data; |
const Char16* s1c16 = s1.getChar16Data(); |
|
const Char16* s2c16 = s2.getChar16Data(); |
|
| |
while (*s1c16 && *s2c16) |
while (*s1 && *s2) |
{ | { |
int r = *s1c16++ - *s2c16++; |
int r = _to_lower(*s1++) - _to_lower(*s2++); |
| |
if (r) | if (r) |
return r; | return r; |
} | } |
| |
if (*s2c16) |
if (*s2) |
return -1; | return -1; |
else if (*s1c16) |
else if (*s1) |
return 1; | return 1; |
| |
return 0; | return 0; |
} | } |
| |
int String::compareNoCase(const String& s1, const String& s2) |
Boolean String::equalNoCase_aux(const String& s1, const String& s2) |
{ |
|
#ifdef PEGASUS_HAS_ICU |
|
if (InitializeICU::initICUSuccessful()) |
|
{ | { |
return u_strcasecmp((const UChar*)s1.getChar16Data(), |
#ifdef PEGASUS_STRING_ENABLE_ICU |
(const UChar*)s2.getChar16Data(), |
|
U_FOLD_CASE_DEFAULT); |
|
} |
|
#endif |
|
const Char16* _s1 = s1.getChar16Data(); |
|
const Char16* _s2 = s2.getChar16Data(); |
|
| |
while (*_s1 && *_s2) |
return String::compareNoCase(s1, s2) == 0; |
{ |
|
int r; |
#else /* PEGASUS_STRING_ENABLE_ICU */ |
|
|
|
Uint16* p = (Uint16*)s1._rep->data; |
|
Uint16* q = (Uint16*)s2._rep->data; |
|
Uint32 n = s2._rep->size; |
| |
if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR && |
while (n >= 8) |
*_s2 <= PEGASUS_MAX_PRINTABLE_CHAR) |
|
{ | { |
r = tolower(*_s1++) - tolower(*_s2++); |
if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) || |
} |
((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) || |
else |
((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) || |
|
((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) || |
|
((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) || |
|
((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) || |
|
((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) || |
|
((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7])))) |
{ | { |
r = *_s1++ - *_s2++; |
return false; |
} | } |
| |
if (r) |
n -= 8; |
return r; |
p += 8; |
|
q += 8; |
} | } |
| |
if (*_s2) |
while (n >= 4) |
return -1; |
|
else if (*_s1) |
|
return 1; |
|
|
|
return 0; |
|
} |
|
|
|
Boolean String::equal(const String& str1, const String& str2) |
|
{ | { |
return String::compare(str1, str2) == 0; |
if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) || |
} |
((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) || |
|
((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) || |
Boolean String::equalNoCase(const String& str1, const String& str2) |
((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3])))) |
{ | { |
#ifdef PEGASUS_HAS_ICU |
|
return compareNoCase(str1, str2) == 0; |
|
#else |
|
if (str1.size() != str2.size()) |
|
return false; | return false; |
|
} |
| |
const Char16* p = str1.getChar16Data(); |
n -= 4; |
const Char16* q = str2.getChar16Data(); |
p += 4; |
|
q += 4; |
Uint32 n = str1.size(); |
} |
| |
while (n--) | while (n--) |
{ | { |
if (*p <= PEGASUS_MAX_PRINTABLE_CHAR && |
if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0])))) |
*q <= PEGASUS_MAX_PRINTABLE_CHAR) |
|
{ |
|
if (tolower(*p++) != tolower(*q++)) |
|
return false; |
|
} |
|
else if (*p++ != *q++) |
|
return false; | return false; |
|
|
|
p++; |
|
q++; |
} | } |
| |
return true; | return true; |
#endif |
|
} |
|
| |
|
#endif /* PEGASUS_STRING_ENABLE_ICU */ |
|
} |
| |
CString String::getCString() const |
Boolean String::equalNoCase(const String& s1, const char* s2) |
{ | { |
Uint32 n = 3*size() + 1; |
_check_null_pointer(s2); |
char* str = new char[n]; |
|
| |
const Char16* msg16 = getChar16Data(); |
#if defined(PEGASUS_STRING_ENABLE_ICU) |
| |
const Uint16 *strsrc = (Uint16 *)msg16; |
return String::equalNoCase(s1, String(s2)); |
Uint16 *endsrc = (Uint16 *)&msg16[size()+1]; |
|
| |
Uint8 *strtgt = (Uint8 *)str; |
#elif defined(PEGASUS_STRING_NO_UTF8) |
Uint8 *endtgt = (Uint8 *)&str[n]; |
|
| |
UTF16toUTF8 (&strsrc, |
const Uint16* p1 = (Uint16*)s1._rep->data; |
endsrc, |
const char* p2 = s2; |
&strtgt, |
size_t n = s1._rep->size; |
endtgt); |
|
| |
char* str1 = new char[strlen(str)+1]; |
while (n--) |
strcpy(str1,str); |
{ |
delete [] str; |
if (!*p2) |
|
return false; |
| |
return CString(str1); |
if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++))) |
|
return false; |
} | } |
| |
#if 0 |
return true; |
// ATTN-RK-P3-20020603: This code is not completely correct |
|
// Wildcard String matching function that may be useful in the future |
|
// The following code was provided by Bob Blair. |
|
|
|
/* _StringMatch Match input MatchString against a GLOB style pattern |
|
Note that MatchChar is the char type so that this source |
|
in portable to different string types. This is an internal function |
|
|
|
Results: The return value is 1 if string matches pattern, and |
|
0 otherwise. The matching operation permits the following |
|
special characters in the pattern: *?\[] (see the manual |
|
entry for details on what these mean). |
|
|
|
| |
Side effects: None. |
#else /* PEGASUS_STRING_ENABLE_ICU */ |
*/ |
|
| |
/* MatchChar defined as a separate entity because this function source used |
// ATTN: optimize this! |
elsewhere was an unsigned char *. Here we use Uint16 to maintain 16 bit |
return String::equalNoCase(s1, String(s2)); |
size. |
|
*/ |
|
typedef Uint16 MatchChar; |
|
| |
inline Uint16 _ToLower(Uint16 ch) |
#endif /* PEGASUS_STRING_ENABLE_ICU */ |
{ |
|
// ICU_TODO: If ICU is available we should do this the correct way. |
|
return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch; |
|
} | } |
| |
inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase) |
Boolean String::equal(const String& s1, const String& s2) |
{ |
|
// ICU_TODO: If ICU is available we should do this the correct way. |
|
if (nocase) |
|
return _ToLower(ch1) == _ToLower(ch2); |
|
|
|
return ch1 == ch2; |
|
} |
|
|
|
|
|
static const MatchChar * |
|
_matchrange(const MatchChar *range, MatchChar c, int nocase) |
|
{ |
|
const MatchChar *p = range; |
|
const MatchChar *rstart = range + 1; |
|
const MatchChar *rend = 0; |
|
MatchChar compchar; |
|
|
|
for (rend = rstart; *rend && *rend != ']'; rend++); |
|
if (*rend == ']') { // if there is an end to this pattern |
|
for (compchar = *rstart; rstart != rend; rstart++) { |
|
if (_Equal(*rstart, c, nocase)) |
|
return ++rend; |
|
if (*rstart == '-') { |
|
rstart++; |
|
if (c >= compchar && c <= *rstart) |
|
return ++rend; |
|
} |
|
} |
|
} |
|
return (const MatchChar *)0; |
|
} |
|
|
|
static int |
|
_StringMatch( |
|
const MatchChar *testString, |
|
const MatchChar *pattern, |
|
int nocase ) /* Ignore case if this is true */ |
|
{ |
|
const MatchChar *pat = pattern; |
|
const MatchChar *str = testString; |
|
unsigned int done = 0; |
|
unsigned int res = 0; // the result: 1 == match |
|
|
|
while (!done) { // main loop walks through pattern and test string |
|
//cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl; |
|
if (!*pat) { //end of pattern |
|
done = 1; // we're done |
|
if (!*str) //end of test, too? |
|
res = 1; // then we matched |
|
} else { //Not end of pattern |
|
if (!*str) { // but end of test |
|
done = 1; // We're done |
|
if (*pat == '*') // If pattern openends |
|
res = 1; // then we matched |
|
} else { //Not end of test |
|
if (*pat == '*') { //Ambiguuity found |
|
if (!*++pat) { //and it ends pattern |
|
done = 1; // then we're done |
|
res = 1; // and match |
|
} else { //if it doesn't end |
|
while (!done) { // until we're done |
|
if (_StringMatch(str, pat, nocase)) { // we recurse |
|
done = 1; //if it recurses true |
|
res = 1; // we done and match |
|
} else { //it recurses false |
|
if (!*str) // see if test is done |
|
done = 1; // yes: we done |
|
else // not done: |
|
str++; // keep testing |
|
} // end test on recursive call |
|
} // end looping on recursive calls |
|
} // end logic when pattern is ambiguous |
|
} else { //pattern not ambiguus |
|
if (*pat == '?') { //pattern is 'any' |
|
pat++, str++; // so move along |
|
} else if (*pat == '[') { //see if it's a range |
|
pat = _matchrange(pat, *str, nocase); // and is a match |
|
if (!pat) { //It is not a match |
|
done = 1; // we're done |
|
res = 0; // no match |
|
} else { //Range matches |
|
str++, pat++; // keep going |
|
} |
|
} else { // only case left is individual characters |
|
if (!_Equal(*pat++, *str++, nocase)) // if they don't match |
|
done = 1; // bail. |
|
} |
|
} // end ("pattern is not ambiguous (*)" logic |
|
} // end logic when pattern and string still have data |
|
} // end logic when pattern still has data |
|
} // end main loop |
|
return res; |
|
} |
|
|
|
|
|
/** match matches a string against a GLOB style pattern. |
|
Return trues if the String parameter matches the pattern. C-Shell style |
|
glob matching is used. |
|
@param str String to be matched against the pattern |
|
@param pattern Pattern to use in the match |
|
@return Boolean true if str matches pattern |
|
The pattern definition is as follows: |
|
<pre> |
|
* Matches any number of any characters |
|
? Match exactly one character |
|
[chars] Match any character in chars |
|
[chara-charb] Match any character in the range between chara and charb |
|
</pre> |
|
The literal characters *, ?, [, ] can be included in a string by |
|
escaping them with backslash "\". Ranges of characters can be concatenated. |
|
<pre> |
|
examples: |
|
Boolean result = String::match("This is a test", "*is*"); |
|
Boolean works = String::match("abcdef123", "*[0-9]"); |
|
</pre> |
|
*/ |
|
Boolean String::match(const String& str, const String& pattern) |
|
{ | { |
return _StringMatch( |
return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, |
(Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0; |
s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0; |
} | } |
| |
/** matchNoCase Matches a String against a GLOB style pattern independent |
Boolean String::equal(const String& s1, const char* s2) |
of case. |
|
Returns true if the str parameter matches the pattern. C-Shell style |
|
glob matching is used. Ignore case in all comparisons. Case is |
|
ignored in the match. |
|
@parm str String containing the string to be matched\ |
|
@parm pattern GLOB style patterh to use in the match. |
|
@return Boolean true if str matches patterh |
|
@see match |
|
*/ |
|
Boolean String::matchNoCase(const String& str, const String& pattern) |
|
{ | { |
return _StringMatch( |
#ifdef PEGASUS_STRING_NO_UTF8 |
(Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0; |
|
} |
|
#endif |
|
| |
|
_check_null_pointer(s2); |
| |
/////////////////////////////////////////////////////////////////////////////// |
const Uint16* p = (Uint16*)s1._rep->data; |
// |
const char* q = s2; |
// String-related functions |
|
// |
|
/////////////////////////////////////////////////////////////////////////////// |
|
| |
Boolean operator==(const String& str1, const String& str2) |
while (*p && *q) |
{ | { |
return String::equal(str1, str2); |
if (*p++ != Uint16(*q++)) |
|
return false; |
} | } |
| |
Boolean operator==(const String& str1, const char* str2) |
return !(*p || *q); |
{ |
|
return String::equal(str1, str2); |
|
} |
|
| |
Boolean operator==(const char* str1, const String& str2) |
#else /* PEGASUS_STRING_NO_UTF8 */ |
{ |
|
return String::equal(str1, str2); |
|
} |
|
| |
Boolean operator!=(const String& str1, const String& str2) |
return String::equal(s1, String(s2)); |
{ |
|
return !String::equal(str1, str2); |
#endif /* PEGASUS_STRING_NO_UTF8 */ |
} | } |
| |
PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str) | PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str) |
{ | { |
|
|
#if defined(PEGASUS_OS_OS400) | #if defined(PEGASUS_OS_OS400) |
|
|
CString cstr = str.getCString(); | CString cstr = str.getCString(); |
const char* utf8str = cstr; | const char* utf8str = cstr; |
|
|
os << utf8str; | os << utf8str; |
| |
#elif defined(PEGASUS_HAS_ICU) |
#elif defined(PEGASUS_STRING_ENABLE_ICU) |
|
|
if (InitializeICU::initICUSuccessful()) | if (InitializeICU::initICUSuccessful()) |
{ | { |
char *buf = NULL; | char *buf = NULL; |
const int size = str.size() * 6; | const int size = str.size() * 6; |
UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size()); |
UnicodeString UniStr( |
|
(const UChar *)str.getChar16Data(), (int32_t)str.size()); |
Uint32 bufsize = UniStr.extract(0,size,buf); | Uint32 bufsize = UniStr.extract(0,size,buf); |
|
|
buf = new char[bufsize+1]; | buf = new char[bufsize+1]; |
UniStr.extract(0,bufsize,buf); | UniStr.extract(0,bufsize,buf); |
os << buf; | os << buf; |
os.flush(); | os.flush(); |
delete [] buf; | delete [] buf; |
} | } |
else |
|
#endif // End of PEGASUS_HAS_ICU #else leg. |
#endif /* PEGASUS_OS_OS400 */ |
{ |
|
for (Uint32 i = 0, n = str.size(); i < n; i++) | for (Uint32 i = 0, n = str.size(); i < n; i++) |
{ | { |
Uint16 code = str[i]; | Uint16 code = str[i]; |
| |
if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR) |
if (code > 0 && !(code & 0xFF00)) |
{ |
|
os << char(code); | os << char(code); |
} |
|
else | else |
{ | { |
// Print in hex format: | // Print in hex format: |
|
|
os << buffer; | os << buffer; |
} | } |
} | } |
} |
|
| |
return os; | return os; |
} | } |
| |
String operator+(const String& str1, const String& str2) |
void String::_append_char_aux() |
{ | { |
return String(str1).append(str2); |
StringRep* tmp; |
} |
|
| |
Boolean operator<(const String& str1, const String& str2) |
if (_rep->cap) |
{ | { |
return String::compare(str1, str2) < 0; |
tmp = StringRep::alloc(2 * _rep->cap); |
|
tmp->size = _rep->size; |
|
_copy(tmp->data, _rep->data, _rep->size); |
} | } |
|
else |
|
{ |
|
tmp = StringRep::alloc(8); |
|
tmp->size = 0; |
|
} |
|
|
|
StringRep::unref(_rep); |
|
_rep = tmp; |
|
} |
|
|
|
PEGASUS_NAMESPACE_END |
|
|
|
/* |
|
================================================================================ |
|
|
|
String optimizations: |
|
|
|
1. Added mechanism allowing certain functions to be inlined only when |
|
used by internal Pegasus modules. External modules (i.e., providers) |
|
link to a non-inline version, which allows for binary compatibility. |
|
|
|
2. Implemented copy-on-write with atomic increment/decrement. This |
|
yieled a 10% improvement for the 'gc' benchmark and a 11% improvment |
|
for the 'ni1000' benchmark. |
|
|
|
3. Employed loop unrolling in several places. For example, see: |
|
|
|
static Uint16* _find(const Uint16* s, size_t n, Uint16 c); |
| |
Boolean operator<=(const String& str1, const String& str2) |
4. Used the "empty-rep" optimization (described in whitepaper from the |
|
GCC Developers Summit). This reduced default construction to a simple |
|
pointer assignment. |
|
|
|
inline String::String() : _rep(&_empty_rep) { } |
|
|
|
5. Implemented Uint16 versions of toupper() and tolower() using tables. |
|
For example: |
|
|
|
static const char _upper[] = |
{ | { |
return String::compare(str1, str2) <= 0; |
0,1,2,...255 |
|
}; |
|
|
|
inline Uint16 _to_upper(Uint16 x) |
|
{ |
|
return (x & 0xFF00) ? x : _upper[x]; |
} | } |
| |
Boolean operator>(const String& str1, const String& str2) |
This outperforms the system implementation by avoiding an anding |
|
operation. |
|
|
|
6. Implemented char* version of the following member functions to |
|
eliminate unecessary creation of anonymous string objects |
|
(temporaries). |
|
|
|
String(const String& s1, const char* s2); |
|
String(const char* s1, const String& s2); |
|
String& String::operator=(const char* str); |
|
Uint32 String::find(const char* s) const; |
|
bool String::equal(const String& s1, const char* s2); |
|
static int String::compare(const String& s1, const char* s2); |
|
String& String::append(const char* str); |
|
String& String::append(const char* str, Uint32 size); |
|
static bool String::equalNoCase(const String& s1, const char* s2); |
|
String& operator=(const char* str) |
|
String& String::assign(const char* str) |
|
String& String::append(const char* str) |
|
Boolean operator==(const String& s1, const char* s2) |
|
Boolean operator==(const char* s1, const String& s2) |
|
Boolean operator!=(const String& s1, const char* s2) |
|
Boolean operator!=(const char* s1, const String& s2) |
|
Boolean operator<(const String& s1, const char* s2) |
|
Boolean operator<(const char* s1, const String& s2) |
|
Boolean operator>(const String& s1, const char* s2) |
|
Boolean operator>(const char* s1, const String& s2) |
|
Boolean operator<=(const String& s1, const char* s2) |
|
Boolean operator<=(const char* s1, const String& s2) |
|
Boolean operator>=(const String& s1, const char* s2) |
|
Boolean operator>=(const char* s1, const String& s2) |
|
String operator+(const String& s1, const char* s2) |
|
String operator+(const char* s1, const String& s2) |
|
|
|
7. Optimized _next_pow_2(), used in rounding the capacity to the next |
|
power of two (algorithm from the book "Hacker's Delight"). |
|
|
|
static Uint32 _next_pow_2(Uint32 x) |
{ | { |
return String::compare(str1, str2) > 0; |
if (x < 8) |
|
return 8; |
|
|
|
x--; |
|
x |= (x >> 1); |
|
x |= (x >> 2); |
|
x |= (x >> 4); |
|
x |= (x >> 8); |
|
x |= (x >> 16); |
|
x++; |
|
|
|
return x; |
} | } |
| |
Boolean operator>=(const String& str1, const String& str2) |
8. Implemented "concatenating constructors" to eliminate temporaries |
|
created by operator+(). This scheme employs the "return-value |
|
optimization" described by Stan Lippman. |
|
|
|
inline String operator+(const String& s1, const String& s2) |
{ | { |
return String::compare(str1, str2) >= 0; |
return String(s1, s2, 0); |
} | } |
| |
PEGASUS_NAMESPACE_END |
9. Experimented to find the optimial initial size for a short string. |
|
Eight seems to offer the best tradoff between space and time. |
|
|
|
10. Inlined all members of the Char16 class. |
|
|
|
11. Used Uint16 internally in the String class. This showed no improvememnt |
|
since Char16 was already fully inlined and was essentially reduced to |
|
Uint16 in any case. |
|
|
|
12. Implemented conditional logic (#if) allowing error checking logic to |
|
be excluded to better performance. Examples include bounds checking |
|
and null-pointer checking. |
|
|
|
13. Used memcpy() and memcmp() where possible. These are implemented using |
|
the rep family of intructions under Intel and are much faster. |
|
|
|
14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 |
|
copy routine overhead. |
|
|
|
15. Added ASCII7 form of the constructor and assign(). |
|
|
|
String s("hello world", String::ASCII7); |
|
|
|
s.assignASCII7("hello world"); |
|
|
|
This avoids slower UTF8 processing when not needed. |
|
|
|
================================================================================ |
|
*/ |