version 1.111.6.7, 2005/10/11 01:44:27
|
version 1.111.6.15, 2005/10/14 14:09:29
|
|
|
// | // |
//============================================================================== | //============================================================================== |
// | // |
// Author: Mike Brasher (mbrasher@bmc.com) |
// Author: Mike Brasher (mbrasher@austin.rr.com) |
// | // |
// Modified By: | // Modified By: |
// Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) | // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) |
|
|
// | // |
//============================================================================== | //============================================================================== |
| |
|
// Note: this table is much faster than the system toupper(). Please do not |
|
// change. |
|
|
const Uint8 _toUpperTable[256] = | const Uint8 _toUpperTable[256] = |
{ | { |
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, | 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, |
|
|
0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, | 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, |
}; | }; |
| |
|
// Note: this table is much faster than the system tulower(). Please do not |
|
// change. |
|
|
const Uint8 _toLowerTable[256] = | const Uint8 _toLowerTable[256] = |
{ | { |
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, | 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, |
|
|
0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, | 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, |
}; | }; |
| |
// Converts 16-bit characters to upper case. |
// Converts 16-bit characters to upper case. This routine is faster than the |
|
// system toupper(). Please do not change. |
inline Uint16 _toUpper(Uint16 x) | inline Uint16 _toUpper(Uint16 x) |
{ | { |
return (x & 0xFF00) ? x : _toUpperTable[x]; | return (x & 0xFF00) ? x : _toUpperTable[x]; |
} | } |
| |
// Converts 16-bit characters to lower case. |
// Converts 16-bit characters to lower case. This routine is faster than the |
|
// system toupper(). Please do not change. |
inline Uint16 _toLower(Uint16 x) | inline Uint16 _toLower(Uint16 x) |
{ | { |
return (x & 0xFF00) ? x : _toLowerTable[x]; | return (x & 0xFF00) ? x : _toLowerTable[x]; |
|
|
template<class P, class Q> | template<class P, class Q> |
static void _copy(P* p, const Q* q, size_t n) | static void _copy(P* p, const Q* q, size_t n) |
{ | { |
// Use loop unrolling. |
// The following employs loop unrolling for efficiency. Please do not |
|
// eliminate. |
| |
while (n >= 8) | while (n >= 8) |
{ | { |
|
|
| |
static Uint16* _find(const Uint16* s, size_t n, Uint16 c) | static Uint16* _find(const Uint16* s, size_t n, Uint16 c) |
{ | { |
|
// The following employs loop unrolling for efficiency. Please do not |
|
// eliminate. |
|
|
while (n >= 4) | while (n >= 4) |
{ | { |
if (s[0] == c) | if (s[0] == c) |
|
|
while (n-- && (*s1++ - *s2++) == 0) | while (n-- && (*s1++ - *s2++) == 0) |
; | ; |
| |
// |
|
|
|
return s1[-1] - s2[-1]; | return s1[-1] - s2[-1]; |
} | } |
| |
|
|
memcpy(s1, s2, n * sizeof(Uint16)); | memcpy(s1, s2, n * sizeof(Uint16)); |
} | } |
| |
void StrinThrowOutOfBounds() |
void StringThrowOutOfBounds() |
{ | { |
throw IndexOutOfBoundsException(); | throw IndexOutOfBoundsException(); |
} | } |
| |
#ifdef PEGASUS_STRING_NO_THROW |
inline void _checkNullPointer(const void* ptr) |
# define _checkNullPointer(ARG) /* empty */ |
|
#else |
|
template<class T> |
|
inline void _checkNullPointer(const T* ptr) |
|
{ | { |
|
#ifdef PEGASUS_STRING_NO_THROW |
|
|
if (!ptr) | if (!ptr) |
throw NullPointer(); | throw NullPointer(); |
} |
|
#endif | #endif |
|
} |
|
|
|
static void _StringThrowBadUTF8(Uint32 index) |
|
{ |
|
MessageLoaderParms parms( |
|
"Common.String.BAD_UTF8", |
|
"The byte sequence starting at index $0 " |
|
"is not valid UTF-8 encoding.", |
|
index); |
|
throw Exception(parms); |
|
} |
| |
static size_t _copyFromUTF8(Uint16* dest, const char* src, size_t n) |
static size_t _copyFromUTF8( |
|
Uint16* dest, |
|
const char* src, |
|
size_t n, |
|
size_t& utf8_error_index) |
{ | { |
Uint16* p = dest; | Uint16* p = dest; |
const Uint8* q = (const Uint8*)src; | const Uint8* q = (const Uint8*)src; |
|
|
if (c > n || !isValid_U8(q, c) || | if (c > n || !isValid_U8(q, c) || |
UTF8toUTF16(&q, q + c, &p, p + n) != 0) | UTF8toUTF16(&q, q + c, &p, p + n) != 0) |
{ | { |
MessageLoaderParms parms("Common.String.BAD_UTF8", |
utf8_error_index = q - (const Uint8*)src; |
"The byte sequence starting at index $0 " |
return size_t(-1); |
"is not valid UTF-8 encoding.", |
|
q - (const Uint8*)src); |
|
throw Exception(parms); |
|
} | } |
| |
n -= c; | n -= c; |
|
|
// terminator). | // terminator). |
static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) | static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) |
{ | { |
|
// The following employs loop unrolling for efficiency. Please do not |
|
// eliminate. |
|
|
const Uint16* q = src; | const Uint16* q = src; |
Uint8* p = (Uint8*)dest; | Uint8* p = (Uint8*)dest; |
| |
|
|
return p - (Uint8*)dest; | return p - (Uint8*)dest; |
} | } |
| |
static inline size_t _convert(Uint16* p, const char* q, size_t n) |
static inline size_t _convert( |
|
Uint16* p, const char* q, size_t n, size_t& utf8_error_index) |
{ | { |
#ifdef PEGASUS_STRING_NO_UTF8 | #ifdef PEGASUS_STRING_NO_UTF8 |
_copy(p, q, n); | _copy(p, q, n); |
return n; | return n; |
#else | #else |
return _copyFromUTF8(p, q, n); |
return _copyFromUTF8(p, q, n, utf8_error_index); |
#endif | #endif |
} | } |
| |
|
|
StringRep* rep = (StringRep*)::operator new( | StringRep* rep = (StringRep*)::operator new( |
sizeof(StringRep) + cap * sizeof(Uint16)); | sizeof(StringRep) + cap * sizeof(Uint16)); |
rep->cap = cap; | rep->cap = cap; |
Atomic_create(&rep->refs, 1); |
new(&rep->refs) AtomicInt(1); |
| |
return rep; | return rep; |
} | } |
| |
static inline void _reserve(StringRep*& rep, Uint32 cap) | static inline void _reserve(StringRep*& rep, Uint32 cap) |
{ | { |
if (cap > rep->cap || Atomic_get(&rep->refs) != 1) |
if (cap > rep->cap || rep->refs.value() != 1) |
{ | { |
size_t n = _roundUpToPow2(cap); | size_t n = _roundUpToPow2(cap); |
StringRep* newRep = StringRep::alloc(n); | StringRep* newRep = StringRep::alloc(n); |
|
|
StringRep* StringRep::create(const char* data, size_t size) | StringRep* StringRep::create(const char* data, size_t size) |
{ | { |
StringRep* rep = StringRep::alloc(size); | StringRep* rep = StringRep::alloc(size); |
rep->size = _convert((Uint16*)rep->data, data, size); |
size_t utf8_error_index; |
rep->data[rep->size] = '\0'; |
rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); |
| |
return rep; |
#ifndef PEGASUS_STRING_NO_THROW |
|
if (rep->size == size_t(-1)) |
|
{ |
|
StringRep::free(rep); |
|
_StringThrowBadUTF8(utf8_error_index); |
} | } |
|
#endif |
|
|
|
rep->data[rep->size] = '\0'; |
| |
StringRep* StringRep::createASCII7(const char* data, size_t size) |
|
{ |
|
StringRep* rep = StringRep::alloc(size); |
|
_copy((Uint16*)rep->data, data, size); |
|
rep->data[rep->size = size] = '\0'; |
|
return rep; | return rep; |
} | } |
| |
|
|
String::String(const char* str) | String::String(const char* str) |
{ | { |
_checkNullPointer(str); | _checkNullPointer(str); |
_rep = StringRep::create(str, strlen(str)); |
|
} |
|
| |
String::String(const char* str, String::ASCII7Tag tag) |
// Set this just in case create() throws an exception. |
{ |
_rep = &StringRep::_emptyRep; |
_checkNullPointer(str); |
_rep = StringRep::create(str, strlen(str)); |
_rep = StringRep::createASCII7(str, strlen(str)); |
|
} | } |
| |
String::String(const char* str, Uint32 n) | String::String(const char* str, Uint32 n) |
{ | { |
_checkNullPointer(str); | _checkNullPointer(str); |
_rep = StringRep::create(str, n); |
|
} |
|
| |
String::String(const char* str, size_t n, String::ASCII7Tag tag) |
// Set this just in case create() throws an exception. |
{ |
_rep = &StringRep::_emptyRep; |
_checkNullPointer(str); |
_rep = StringRep::create(str, n); |
_rep = StringRep::createASCII7(str, n); |
|
} | } |
| |
String::String(const String& s1, const String& s2) | String::String(const String& s1, const String& s2) |
|
|
size_t n2 = strlen(s2); | size_t n2 = strlen(s2); |
_rep = StringRep::alloc(n1 + n2); | _rep = StringRep::alloc(n1 + n2); |
_copy(_rep->data, s1._rep->data, n1); | _copy(_rep->data, s1._rep->data, n1); |
_rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2); |
size_t utf8_error_index; |
|
size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); |
|
|
|
#ifndef PEGASUS_STRING_NO_THROW |
|
if (tmp == size_t(-1)) |
|
{ |
|
StringRep::free(_rep); |
|
_rep = &StringRep::_emptyRep; |
|
_StringThrowBadUTF8(utf8_error_index); |
|
} |
|
#endif |
|
|
|
_rep->size = n1 + tmp; |
_rep->data[_rep->size] = '\0'; | _rep->data[_rep->size] = '\0'; |
} | } |
| |
|
|
size_t n1 = strlen(s1); | size_t n1 = strlen(s1); |
size_t n2 = s2._rep->size; | size_t n2 = s2._rep->size; |
_rep = StringRep::alloc(n1 + n2); | _rep = StringRep::alloc(n1 + n2); |
_rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1); |
size_t utf8_error_index; |
|
size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); |
|
|
|
#ifndef PEGASUS_STRING_NO_THROW |
|
if (tmp == size_t(-1)) |
|
{ |
|
StringRep::free(_rep); |
|
_rep = &StringRep::_emptyRep; |
|
_StringThrowBadUTF8(utf8_error_index); |
|
} |
|
#endif |
|
|
|
_rep->size = n2 + tmp; |
_copy(_rep->data + n1, s2._rep->data, n2); | _copy(_rep->data + n1, s2._rep->data, n2); |
_rep->data[_rep->size] = '\0'; | _rep->data[_rep->size] = '\0'; |
} | } |
|
|
{ | { |
_checkNullPointer(str); | _checkNullPointer(str); |
| |
if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) |
if (n > _rep->cap || _rep->refs.value() != 1) |
{ | { |
StringRep::unref(_rep); | StringRep::unref(_rep); |
_rep = StringRep::alloc(n); | _rep = StringRep::alloc(n); |
|
|
{ | { |
_checkNullPointer(str); | _checkNullPointer(str); |
| |
if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) |
if (n > _rep->cap || _rep->refs.value() != 1) |
{ | { |
StringRep::unref(_rep); | StringRep::unref(_rep); |
_rep = StringRep::alloc(n); | _rep = StringRep::alloc(n); |
} | } |
| |
_rep->size = _convert(_rep->data, str, n); |
size_t utf8_error_index; |
_rep->data[_rep->size] = 0; |
_rep->size = _convert(_rep->data, str, n, utf8_error_index); |
|
|
return *this; |
|
} |
|
| |
String& String::assignASCII7(const char* str, Uint32 n) |
#ifndef PEGASUS_STRING_NO_THROW |
{ |
if (_rep->size == size_t(-1)) |
_checkNullPointer(str); |
|
|
|
if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) |
|
{ | { |
StringRep::unref(_rep); |
StringRep::free(_rep); |
_rep = StringRep::alloc(n); |
_rep = &StringRep::_emptyRep; |
|
_StringThrowBadUTF8(utf8_error_index); |
} | } |
|
#endif |
| |
_copy(_rep->data, str, n); |
_rep->data[_rep->size] = 0; |
_rep->data[_rep->size = n] = 0; |
|
| |
return *this; | return *this; |
} | } |
|
|
{ | { |
if (_rep->size) | if (_rep->size) |
{ | { |
if (Atomic_get(&_rep->refs) == 1) |
if (_rep->refs.value() == 1) |
{ | { |
_rep->size = 0; | _rep->size = 0; |
_rep->data[0] = '\0'; | _rep->data[0] = '\0'; |
|
|
| |
CString String::getCString() const | CString String::getCString() const |
{ | { |
|
// A UTF8 string can have three times as many characters as its UTF16 |
|
// counterpart, so we allocate extra memory for the worst case. In the |
|
// best case, we may need only one third of the memory allocated. But |
|
// downsizing the string afterwards is expensive and unecessary since |
|
// CString objects are usually short-lived (disappearing after only a few |
|
// instructions). CString objects are typically created on the stack as |
|
// means to obtain a char* pointer. |
|
|
#ifdef PEGASUS_STRING_NO_UTF8 | #ifdef PEGASUS_STRING_NO_UTF8 |
char* str = (char*)operator new(_rep->size + 1); | char* str = (char*)operator new(_rep->size + 1); |
_copy(str, _rep->data, _rep->size); | _copy(str, _rep->data, _rep->size); |
|
|
size_t cap = oldSize + size; | size_t cap = oldSize + size; |
| |
_reserve(_rep, cap); | _reserve(_rep, cap); |
_rep->size += _convert((Uint16*)_rep->data + oldSize, str, size); |
size_t utf8_error_index; |
|
size_t tmp = _convert( |
|
(Uint16*)_rep->data + oldSize, str, size, utf8_error_index); |
|
|
|
#ifndef PEGASUS_STRING_NO_THROW |
|
if (tmp == size_t(-1)) |
|
{ |
|
StringRep::free(_rep); |
|
_rep = &StringRep::_emptyRep; |
|
_StringThrowBadUTF8(utf8_error_index); |
|
} |
|
#endif |
|
|
|
_rep->size += tmp; |
_rep->data[_rep->size] = '\0'; | _rep->data[_rep->size] = '\0'; |
| |
return *this; | return *this; |
|
|
| |
_checkBounds(index + n, _rep->size); | _checkBounds(index + n, _rep->size); |
| |
if (Atomic_get(&_rep->refs) != 1) |
if (_rep->refs.value() != 1) |
_rep = StringRep::copyOnWrite(_rep); | _rep = StringRep::copyOnWrite(_rep); |
| |
assert(index + n <= _rep->size); | assert(index + n <= _rep->size); |
|
|
| |
if (InitializeICU::initICUSuccessful()) | if (InitializeICU::initICUSuccessful()) |
{ | { |
if (Atomic_get(&_rep->refs) != 1) |
if (_rep->refs.value() != 1) |
_rep = StringRep::copyOnWrite(_rep); | _rep = StringRep::copyOnWrite(_rep); |
| |
// This will do a locale-insensitive, but context-sensitive convert. | // This will do a locale-insensitive, but context-sensitive convert. |
|
|
| |
#endif /* PEGASUS_HAS_ICU */ | #endif /* PEGASUS_HAS_ICU */ |
| |
if (Atomic_get(&_rep->refs) != 1) |
if (_rep->refs.value() != 1) |
_rep = StringRep::copyOnWrite(_rep); | _rep = StringRep::copyOnWrite(_rep); |
| |
Uint16* p = _rep->data; | Uint16* p = _rep->data; |
|
|
| |
if (InitializeICU::initICUSuccessful()) | if (InitializeICU::initICUSuccessful()) |
{ | { |
if (Atomic_get(&_rep->refs) != 1) |
if (_rep->refs.value() != 1) |
_rep = StringRep::copyOnWrite(_rep); | _rep = StringRep::copyOnWrite(_rep); |
| |
// This will do a locale-insensitive, but context-sensitive convert. | // This will do a locale-insensitive, but context-sensitive convert. |
|
|
| |
#endif /* PEGASUS_HAS_ICU */ | #endif /* PEGASUS_HAS_ICU */ |
| |
if (Atomic_get(&_rep->refs) != 1) |
if (_rep->refs.value() != 1) |
_rep = StringRep::copyOnWrite(_rep); | _rep = StringRep::copyOnWrite(_rep); |
| |
Uint16* p = _rep->data; | Uint16* p = _rep->data; |
|
|
| |
#else /* PEGASUS_HAS_ICU */ | #else /* PEGASUS_HAS_ICU */ |
| |
|
// The following employs loop unrolling for efficiency. Please do not |
|
// eliminate. |
|
|
Uint16* p = (Uint16*)s1.getChar16Data(); | Uint16* p = (Uint16*)s1.getChar16Data(); |
Uint16* q = (Uint16*)s2.getChar16Data(); | Uint16* q = (Uint16*)s2.getChar16Data(); |
Uint32 n = s2.size(); | Uint32 n = s2.size(); |
|
|
| |
(+) [DONE] Check for overlow condition in StringRep::alloc(). | (+) [DONE] Check for overlow condition in StringRep::alloc(). |
| |
----------- |
(+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab"). |
| |
(+) Fix throw-related memory leak. |
(+) [DONE] Fix throw-related memory leak. |
| |
(+) DOC++ String.h |
(+) [DONE] Look at PEP223 for coding security guidelines. |
| |
(+) Look at PEP223 for coding security guidelines. |
(+) [DONE] Use old AtomicInt for now (split new AtomicInt into another |
|
bug. |
| |
(+) Replace AtomicInt with new Atomic implementation. |
(+) [DONE] Removed appendASCII() and the ASCII form of the constructor. |
| |
(+) Implement Atomic operations for HP. |
----------- |
| |
(+) Remove tabs. |
(+) DOC++ String.h |
| |
================================================================================ | ================================================================================ |
*/ | */ |