version 1.25, 2001/06/17 04:20:33
|
version 1.125, 2007/01/11 16:21:54
|
|
|
//%///////////////////////////////////////////////////////////////////////////// |
//%2006//////////////////////////////////////////////////////////////////////// |
// | // |
// Copyright (c) 2000, 2001 The Open group, BMC Software, Tivoli Systems, IBM |
// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development |
|
// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. |
|
// Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.; |
|
// IBM Corp.; EMC Corporation, The Open Group. |
|
// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; |
|
// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. |
|
// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
|
// EMC Corporation; VERITAS Software Corporation; The Open Group. |
|
// Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
|
// EMC Corporation; Symantec Corporation; The Open Group. |
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a copy | // Permission is hereby granted, free of charge, to any person obtaining a copy |
// of this software and associated documentation files (the "Software"), to | // of this software and associated documentation files (the "Software"), to |
|
|
// | // |
//============================================================================== | //============================================================================== |
// | // |
// Author: Mike Brasher (mbrasher@bmc.com) |
|
// |
|
// Modified By: |
|
// |
|
//%///////////////////////////////////////////////////////////////////////////// | //%///////////////////////////////////////////////////////////////////////////// |
| |
|
#include <Pegasus/Common/PegasusAssert.h> |
#include <cctype> |
#include <cstring> |
#include "String.h" |
#include "InternalException.h" |
#include "Exception.h" |
#include "CommonUTF.h" |
#include "String.h" |
#include "MessageLoader.h" |
#include <iostream> |
#include "StringRep.h" |
|
|
|
#ifdef PEGASUS_HAS_ICU |
|
#include <unicode/ustring.h> |
|
#include <unicode/uchar.h> |
|
#endif |
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
#define PEGASUS_ARRAY_T String |
//============================================================================== |
#include <Pegasus/Common/ArrayImpl.h> |
// |
#undef PEGASUS_ARRAY_T |
// Compile-time macros (undefined by default). |
|
// |
|
// PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. |
|
// |
|
//============================================================================== |
| |
const String String::EMPTY; |
//============================================================================== |
|
// |
|
// File-scope definitions: |
|
// |
|
//============================================================================== |
|
|
|
// Note: this table is much faster than the system toupper(). Please do not |
|
// change. |
| |
static inline void _SkipWhitespace(const Char16*& p) |
const Uint8 _toUpperTable[256] = |
{ | { |
while (*p && isspace(*p)) |
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, |
p++; |
0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, |
|
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, |
|
0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, |
|
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, |
|
0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, |
|
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, |
|
0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, |
|
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, |
|
0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, |
|
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, |
|
0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, |
|
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, |
|
0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, |
|
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, |
|
0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, |
|
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, |
|
0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, |
|
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, |
|
0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, |
|
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, |
|
0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, |
|
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, |
|
0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, |
|
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, |
|
0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, |
|
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, |
|
0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, |
|
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, |
|
0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, |
|
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, |
|
0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, |
|
}; |
|
|
|
// Note: this table is much faster than the system tulower(). Please do not |
|
// change. |
|
|
|
const Uint8 _toLowerTable[256] = |
|
{ |
|
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, |
|
0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, |
|
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, |
|
0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, |
|
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, |
|
0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, |
|
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, |
|
0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, |
|
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, |
|
0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, |
|
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, |
|
0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, |
|
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, |
|
0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, |
|
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, |
|
0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, |
|
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, |
|
0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, |
|
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, |
|
0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, |
|
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, |
|
0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, |
|
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, |
|
0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, |
|
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, |
|
0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, |
|
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, |
|
0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, |
|
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, |
|
0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, |
|
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, |
|
0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, |
|
}; |
|
|
|
// Converts 16-bit characters to upper case. This routine is faster than the |
|
// system toupper(). Please do not change. |
|
inline Uint16 _toUpper(Uint16 x) |
|
{ |
|
return (x & 0xFF00) ? x : _toUpperTable[x]; |
|
} |
|
|
|
// Converts 16-bit characters to lower case. This routine is faster than the |
|
// system toupper(). Please do not change. |
|
inline Uint16 _toLower(Uint16 x) |
|
{ |
|
return (x & 0xFF00) ? x : _toLowerTable[x]; |
|
} |
|
|
|
// Rounds x up to the nearest power of two (or just returns 8 if x < 8). |
|
static Uint32 _roundUpToPow2(Uint32 x) |
|
{ |
|
// Check for potential overflow in x |
|
PEGASUS_CHECK_CAPACITY_OVERFLOW(x); |
|
|
|
if (x < 8) |
|
return 8; |
|
|
|
x--; |
|
x |= (x >> 1); |
|
x |= (x >> 2); |
|
x |= (x >> 4); |
|
x |= (x >> 8); |
|
x |= (x >> 16); |
|
x++; |
|
|
|
return x; |
|
} |
|
|
|
template<class P, class Q> |
|
static void _copy(P* p, const Q* q, size_t n) |
|
{ |
|
// The following employs loop unrolling for efficiency. Please do not |
|
// eliminate. |
|
|
|
while (n >= 8) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p[4] = q[4]; |
|
p[5] = q[5]; |
|
p[6] = q[6]; |
|
p[7] = q[7]; |
|
p += 8; |
|
q += 8; |
|
n -= 8; |
|
} |
|
|
|
while (n >= 4) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p += 4; |
|
q += 4; |
|
n -= 4; |
} | } |
| |
inline Uint32 StrLen(const char* str) |
while (n--) |
|
*p++ = *q++; |
|
} |
|
|
|
static Uint16* _find(const Uint16* s, size_t n, Uint16 c) |
{ | { |
if (!str) |
// The following employs loop unrolling for efficiency. Please do not |
throw NullPointer(); |
// eliminate. |
| |
return strlen(str); |
while (n >= 4) |
|
{ |
|
if (s[0] == c) |
|
return (Uint16*)s; |
|
if (s[1] == c) |
|
return (Uint16*)&s[1]; |
|
if (s[2] == c) |
|
return (Uint16*)&s[2]; |
|
if (s[3] == c) |
|
return (Uint16*)&s[3]; |
|
|
|
n -= 4; |
|
s += 4; |
} | } |
| |
inline Uint32 StrLen(const Char16* str) |
if (n) |
{ | { |
if (!str) |
if (*s == c) |
throw NullPointer(); |
return (Uint16*)s; |
|
s++; |
|
n--; |
|
} |
| |
Uint32 n = 0; |
if (n) |
|
{ |
|
if (*s == c) |
|
return (Uint16*)s; |
|
s++; |
|
n--; |
|
} |
| |
while (*str++) |
if (n && *s == c) |
n++; |
return (Uint16*)s; |
| |
return n; |
// Not found! |
|
return 0; |
} | } |
| |
String::String() |
static int _compare(const Uint16* s1, const Uint16* s2) |
|
{ |
|
while (*s1 && *s2) |
{ | { |
_rep.append('\0'); |
int r = *s1++ - *s2++; |
|
|
|
if (r) |
|
return r; |
|
} |
|
|
|
if (*s2) |
|
return -1; |
|
else if (*s1) |
|
return 1; |
|
|
|
return 0; |
} | } |
| |
String::String(const String& x) : _rep(x._rep) |
static int _compareNoUTF8(const Uint16* s1, const char* s2) |
{ | { |
|
Uint16 c1; |
|
Uint16 c2; |
|
|
|
do |
|
{ |
|
c1 = *s1++; |
|
c2 = *s2++; |
|
|
|
if (c1 == 0) |
|
return c1 - c2; |
|
} |
|
while (c1 == c2); |
| |
|
return c1 - c2; |
} | } |
| |
String::String(const String& x, Uint32 n) |
static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) |
{ | { |
_rep.append('\0'); |
memcpy(s1, s2, n * sizeof(Uint16)); |
append(x.getData(), n); |
|
} | } |
| |
String::String(const Char16* x) : _rep(x, StrLen(x) + 1) |
void StringThrowOutOfBounds() |
{ | { |
|
throw IndexOutOfBoundsException(); |
|
} |
| |
|
inline void _checkNullPointer(const void* ptr) |
|
{ |
|
if (!ptr) |
|
throw NullPointer(); |
} | } |
| |
String::String(const Char16* x, Uint32 n) |
static void _StringThrowBadUTF8(Uint32 index) |
|
{ |
|
MessageLoaderParms parms( |
|
"Common.String.BAD_UTF8", |
|
"The byte sequence starting at index $0 " |
|
"is not valid UTF-8 encoding.", |
|
index); |
|
throw Exception(parms); |
|
} |
|
|
|
static size_t _copyFromUTF8( |
|
Uint16* dest, |
|
const char* src, |
|
size_t n, |
|
size_t& utf8_error_index) |
|
{ |
|
Uint16* p = dest; |
|
const Uint8* q = (const Uint8*)src; |
|
|
|
// Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). |
|
// Use loop-unrolling. |
|
|
|
while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p[4] = q[4]; |
|
p[5] = q[5]; |
|
p[6] = q[6]; |
|
p[7] = q[7]; |
|
p += 8; |
|
q += 8; |
|
n -= 8; |
|
} |
|
|
|
while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p += 4; |
|
q += 4; |
|
n -= 4; |
|
} |
|
|
|
switch (n) |
|
{ |
|
case 0: |
|
return p - dest; |
|
case 1: |
|
if (q[0] < 128) |
|
{ |
|
p[0] = q[0]; |
|
return p + 1 - dest; |
|
} |
|
break; |
|
case 2: |
|
if (((q[0]|q[1]) & 0x80) == 0) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
return p + 2 - dest; |
|
} |
|
break; |
|
case 3: |
|
if (((q[0]|q[1]|q[2]) & 0x80) == 0) |
{ | { |
assign(x, n); |
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
return p + 3 - dest; |
|
} |
|
break; |
} | } |
| |
String::String(const char* str) |
// Process remaining characters. |
|
|
|
while (n) |
{ | { |
Uint32 n = ::strlen(str) + 1; |
// Optimize for 7-bit ASCII case. |
reserve(n); |
|
| |
while (n--) |
if (*q < 128) |
_rep.append(*str++); |
{ |
|
*p++ = *q++; |
|
n--; |
} | } |
|
else |
|
{ |
|
Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1; |
| |
String::String(const char* str, Uint32 n_) |
if (c > n || !isValid_U8(q, c) || |
|
UTF8toUTF16(&q, q + c, &p, p + n) != 0) |
{ | { |
Uint32 n = _min(strlen(str), n_); |
utf8_error_index = q - (const Uint8*)src; |
reserve(n + 1); |
return size_t(-1); |
|
} |
| |
while (n--) |
n -= c; |
_rep.append(*str++); |
} |
|
} |
| |
_rep.append('\0'); |
return p - dest; |
} | } |
| |
String& String::assign(const Char16* x) |
// Note: dest must be at least three times src (plus an extra byte for |
|
// terminator). |
|
static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) |
{ | { |
_rep.clear(); |
// The following employs loop unrolling for efficiency. Please do not |
_rep.append(x, StrLen(x) + 1); |
// eliminate. |
return *this; |
|
|
const Uint16* q = src; |
|
Uint8* p = (Uint8*)dest; |
|
|
|
while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p += 4; |
|
q += 4; |
|
n -= 4; |
} | } |
| |
String& String::assign(const Char16* str, Uint32 n) |
switch (n) |
{ | { |
_rep.clear(); |
case 0: |
Uint32 m = _min(StrLen(str), n); |
return p - (Uint8*)dest; |
_rep.append(str, m); |
case 1: |
_rep.append('\0'); |
if (q[0] < 128) |
return *this; |
{ |
|
p[0] = q[0]; |
|
return p + 1 - (Uint8*)dest; |
|
} |
|
break; |
|
case 2: |
|
if (q[0] < 128 && q[1] < 128) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
return p + 2 - (Uint8*)dest; |
} | } |
|
break; |
|
case 3: |
|
if (q[0] < 128 && q[1] < 128 && q[2] < 128) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
return p + 3 - (Uint8*)dest; |
|
} |
|
break; |
|
} |
|
|
|
// If this line was reached, there must be characters greater than 128. |
|
|
|
UTF16toUTF8(&q, q + n, &p, p + 3 * n); |
| |
String& String::assign(const char* x) |
return p - (Uint8*)dest; |
|
} |
|
|
|
static inline size_t _convert( |
|
Uint16* p, const char* q, size_t n, size_t& utf8_error_index) |
{ | { |
_rep.clear(); |
#ifdef PEGASUS_STRING_NO_UTF8 |
Uint32 n = strlen(x); |
_copy(p, q, n); |
_rep.reserve(n + 1); |
return n; |
|
#else |
|
return _copyFromUTF8(p, q, n, utf8_error_index); |
|
#endif |
|
} |
| |
while (n--) |
//============================================================================== |
_rep.append(*x++); |
// |
|
// class CString |
|
// |
|
//============================================================================== |
| |
_rep.append('\0'); |
CString::CString(const CString& cstr) : _rep(0) |
|
{ |
|
if (cstr._rep) |
|
{ |
|
size_t n = strlen(cstr._rep) + 1; |
|
_rep = (char*)operator new(n); |
|
memcpy(_rep, cstr._rep, n); |
|
} |
|
} |
|
|
|
CString& CString::operator=(const CString& cstr) |
|
{ |
|
if (&cstr != this) |
|
{ |
|
if (_rep) |
|
{ |
|
operator delete(_rep); |
|
_rep = 0; |
|
} |
|
|
|
if (cstr._rep) |
|
{ |
|
size_t n = strlen(cstr._rep) + 1; |
|
_rep = (char*)operator new(n); |
|
memcpy(_rep, cstr._rep, n); |
|
} |
|
} |
| |
return *this; | return *this; |
} | } |
| |
String& String::assign(const char* x, Uint32 n_) |
//============================================================================== |
|
// |
|
// class StringRep |
|
// |
|
//============================================================================== |
|
|
|
StringRep StringRep::_emptyRep; |
|
|
|
inline StringRep* StringRep::alloc(size_t cap) |
{ | { |
_rep.clear(); |
// Check for potential overflow in cap |
|
PEGASUS_CHECK_CAPACITY_OVERFLOW(cap); |
| |
Uint32 n = _min(strlen(x), n_); |
StringRep* rep = (StringRep*)::operator new( |
_rep.reserve(n + 1); |
sizeof(StringRep) + cap * sizeof(Uint16)); |
|
rep->cap = cap; |
|
new(&rep->refs) AtomicInt(1); |
| |
while (n--) |
return rep; |
_rep.append(*x++); |
} |
| |
_rep.append('\0'); |
static inline void _reserve(StringRep*& rep, Uint32 cap) |
|
{ |
|
if (cap > rep->cap || rep->refs.get() != 1) |
|
{ |
|
size_t n = _roundUpToPow2(cap); |
|
StringRep* newRep = StringRep::alloc(n); |
|
newRep->size = rep->size; |
|
_copy(newRep->data, rep->data, rep->size + 1); |
|
StringRep::unref(rep); |
|
rep = newRep; |
|
} |
|
} |
| |
return *this; |
StringRep* StringRep::create(const Uint16* data, size_t size) |
|
{ |
|
StringRep* rep = StringRep::alloc(size); |
|
rep->size = size; |
|
_copy(rep->data, data, size); |
|
rep->data[size] = '\0'; |
|
return rep; |
} | } |
| |
char* String::allocateCString(Uint32 extraBytes, Boolean noThrow) const |
StringRep* StringRep::copyOnWrite(StringRep* rep) |
{ | { |
Uint32 n = size() + 1; |
// Return a new copy of rep. Release rep. |
char* str = new char[n + extraBytes]; |
|
char* p = str; |
|
const Char16* q = getData(); |
|
| |
for (Uint32 i = 0; i < n; i++) |
StringRep* newRep = StringRep::alloc(rep->size); |
|
newRep->size = rep->size; |
|
_copy(newRep->data, rep->data, rep->size); |
|
newRep->data[newRep->size] = '\0'; |
|
StringRep::unref(rep); |
|
return newRep; |
|
} |
|
|
|
StringRep* StringRep::create(const char* data, size_t size) |
{ | { |
Uint16 c = *q++; |
StringRep* rep = StringRep::alloc(size); |
*p++ = char(c); |
size_t utf8_error_index; |
|
rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); |
| |
if ((c & 0xff00) && !noThrow) |
if (rep->size == size_t(-1)) |
throw TruncatedCharacter(); |
{ |
|
StringRep::free(rep); |
|
_StringThrowBadUTF8((Uint32)utf8_error_index); |
} | } |
| |
return str; |
rep->data[rep->size] = '\0'; |
|
|
|
return rep; |
} | } |
| |
void String::appendToCString( |
Uint32 StringRep::length(const Uint16* str) |
char* str, |
|
Uint32 length, |
|
Boolean noThrow) const |
|
{ | { |
if (!str) |
// Note: We could unroll this but it is rarely called. |
throw NullPointer(); |
|
|
const Uint16* end = (Uint16*)str; |
|
|
|
while (*end++) |
|
; |
|
|
|
return (Uint32)(end - str - 1); |
|
} |
| |
Uint32 n = _min(size(), length); |
//============================================================================== |
|
// |
|
// class String |
|
// |
|
//============================================================================== |
| |
char* p = str + strlen(str); |
const String String::EMPTY; |
const Char16* q = getData(); |
|
| |
for (Uint32 i = 0; i < n; i++) |
String::String(const String& str, Uint32 n) |
{ | { |
Uint16 c = *q++; |
_checkBounds(n, str._rep->size); |
*p++ = char(c); |
_rep = StringRep::create(str._rep->data, n); |
|
} |
| |
if ((c & 0xff00) && !noThrow) |
String::String(const Char16* str) |
throw TruncatedCharacter(); |
{ |
|
_checkNullPointer(str); |
|
_rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str)); |
} | } |
| |
*p = '\0'; |
String::String(const Char16* str, Uint32 n) |
|
{ |
|
_checkNullPointer(str); |
|
_rep = StringRep::create((Uint16*)str, n); |
} | } |
| |
Char16& String::operator[](Uint32 i) |
String::String(const char* str) |
{ | { |
if (i > size()) |
_checkNullPointer(str); |
ThrowOutOfBounds(); |
|
| |
return _rep[i]; |
// Set this just in case create() throws an exception. |
|
_rep = &StringRep::_emptyRep; |
|
_rep = StringRep::create(str, strlen(str)); |
} | } |
| |
const Char16 String::operator[](Uint32 i) const |
String::String(const char* str, Uint32 n) |
{ | { |
if (i > size()) |
_checkNullPointer(str); |
ThrowOutOfBounds(); |
|
| |
return _rep[i]; |
// Set this just in case create() throws an exception. |
|
_rep = &StringRep::_emptyRep; |
|
_rep = StringRep::create(str, n); |
} | } |
| |
String& String::append(const Char16* str, Uint32 n) |
String::String(const String& s1, const String& s2) |
{ | { |
Uint32 m = _min(StrLen(str), n); |
size_t n1 = s1._rep->size; |
_rep.reserve(_rep.size() + m); |
size_t n2 = s2._rep->size; |
_rep.remove(_rep.size() - 1); |
size_t n = n1 + n2; |
_rep.append(str, m); |
_rep = StringRep::alloc(n); |
_rep.append('\0'); |
_copy(_rep->data, s1._rep->data, n1); |
return *this; |
_copy(_rep->data + n1, s2._rep->data, n2); |
|
_rep->size = n; |
|
_rep->data[n] = '\0'; |
} | } |
| |
void String::remove(Uint32 pos, Uint32 size) |
String::String(const String& s1, const char* s2) |
{ | { |
if (size == PEG_NOT_FOUND) |
_checkNullPointer(s2); |
size = this->size() - pos; |
size_t n1 = s1._rep->size; |
|
size_t n2 = strlen(s2); |
|
_rep = StringRep::alloc(n1 + n2); |
|
_copy(_rep->data, s1._rep->data, n1); |
|
size_t utf8_error_index; |
|
size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); |
| |
if (pos + size > this->size()) |
if (tmp == size_t(-1)) |
ThrowOutOfBounds(); |
{ |
|
StringRep::free(_rep); |
|
_rep = &StringRep::_emptyRep; |
|
_StringThrowBadUTF8((Uint32)utf8_error_index); |
|
} |
| |
if (size) |
_rep->size = n1 + tmp; |
_rep.remove(pos, size); |
_rep->data[_rep->size] = '\0'; |
} | } |
| |
int String::compare(const Char16* s1, const Char16* s2, Uint32 n) |
String::String(const char* s1, const String& s2) |
{ | { |
while (n--) |
_checkNullPointer(s1); |
{ |
size_t n1 = strlen(s1); |
int r = *s1++ - *s2++; |
size_t n2 = s2._rep->size; |
|
_rep = StringRep::alloc(n1 + n2); |
|
size_t utf8_error_index; |
|
size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); |
| |
if (r) |
if (tmp == size_t(-1)) |
return r; |
{ |
|
StringRep::free(_rep); |
|
_rep = &StringRep::_emptyRep; |
|
_StringThrowBadUTF8((Uint32)utf8_error_index); |
} | } |
| |
return 0; |
_rep->size = n2 + tmp; |
|
_copy(_rep->data + n1, s2._rep->data, n2); |
|
_rep->data[_rep->size] = '\0'; |
} | } |
| |
int String::compareNoCase(const char* s1, const char* s2, Uint32 n) |
String& String::assign(const String& str) |
{ | { |
while (n--) |
if (_rep != str._rep) |
{ | { |
int r = tolower(*s1++) - tolower(*s2++); |
StringRep::unref(_rep); |
|
StringRep::ref(_rep = str._rep); |
|
} |
| |
if (r) |
return *this; |
return r; |
|
} | } |
| |
return 0; |
String& String::assign(const Char16* str, Uint32 n) |
|
{ |
|
_checkNullPointer(str); |
|
|
|
if (n > _rep->cap || _rep->refs.get() != 1) |
|
{ |
|
StringRep::unref(_rep); |
|
_rep = StringRep::alloc(n); |
} | } |
| |
Boolean String::equal(const String& x, const String& y) |
_rep->size = n; |
|
_copy(_rep->data, (Uint16*)str, n); |
|
_rep->data[n] = '\0'; |
|
|
|
return *this; |
|
} |
|
|
|
String& String::assign(const char* str, Uint32 n) |
{ | { |
if (x.size() != y.size()) |
_checkNullPointer(str); |
return false; |
|
| |
return String::compare(x.getData(), y.getData(), x.size()) == 0; |
if (n > _rep->cap || _rep->refs.get() != 1) |
|
{ |
|
StringRep::unref(_rep); |
|
_rep = StringRep::alloc(n); |
} | } |
| |
Boolean String::equal(const String& x, const Char16* y) |
size_t utf8_error_index; |
|
_rep->size = _convert(_rep->data, str, n, utf8_error_index); |
|
|
|
if (_rep->size == size_t(-1)) |
{ | { |
if (x.size() != StrLen(y)) |
StringRep::free(_rep); |
return false; |
_rep = &StringRep::_emptyRep; |
|
_StringThrowBadUTF8((Uint32)utf8_error_index); |
|
} |
| |
return String::compare(x.getData(), y, x.size()) == 0; |
_rep->data[_rep->size] = 0; |
|
|
|
return *this; |
} | } |
| |
Boolean String::equal(const Char16* x, const String& y) |
void String::clear() |
|
{ |
|
if (_rep->size) |
{ | { |
return equal(y, x); |
if (_rep->refs.get() == 1) |
|
{ |
|
_rep->size = 0; |
|
_rep->data[0] = '\0'; |
|
} |
|
else |
|
{ |
|
StringRep::unref(_rep); |
|
_rep = &StringRep::_emptyRep; |
|
} |
|
} |
} | } |
| |
Boolean String::equal(const String& x, const char* y) |
void String::reserveCapacity(Uint32 cap) |
{ | { |
return equal(x, String(y)); |
_reserve(_rep, cap); |
} | } |
| |
Boolean String::equal(const char* x, const String& y) |
CString String::getCString() const |
{ | { |
return equal(String(x), y); |
// A UTF8 string can have three times as many characters as its UTF16 |
|
// counterpart, so we allocate extra memory for the worst case. In the |
|
// best case, we may need only one third of the memory allocated. But |
|
// downsizing the string afterwards is expensive and unecessary since |
|
// CString objects are usually short-lived (disappearing after only a few |
|
// instructions). CString objects are typically created on the stack as |
|
// means to obtain a char* pointer. |
|
|
|
#ifdef PEGASUS_STRING_NO_UTF8 |
|
char* str = (char*)operator new(_rep->size + 1); |
|
_copy(str, _rep->data, _rep->size); |
|
str[_rep->size] = '\0'; |
|
return CString(str); |
|
#else |
|
Uint32 n = (Uint32)(3 * _rep->size); |
|
char* str = (char*)operator new(n + 1); |
|
size_t size = _copyToUTF8(str, _rep->data, _rep->size); |
|
str[size] = '\0'; |
|
return CString(str); |
|
#endif |
} | } |
| |
Boolean String::equalNoCase(const String& x, const String& y) |
String& String::append(const Char16* str, Uint32 n) |
{ | { |
if (x.size() != y.size()) |
_checkNullPointer(str); |
return false; |
|
| |
const Char16* p = x.getData(); |
size_t oldSize = _rep->size; |
const Char16* q = y.getData(); |
size_t newSize = oldSize + n; |
|
_reserve(_rep, (Uint32)newSize); |
|
_copy(_rep->data + oldSize, (Uint16*)str, n); |
|
_rep->size = newSize; |
|
_rep->data[newSize] = '\0'; |
| |
Uint32 n = x.size(); |
return *this; |
|
} |
| |
while (n--) |
String& String::append(const String& str) |
{ | { |
if (*p <= 127 && *q <= 127) |
return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size); |
|
} |
|
|
|
String& String::append(const char* str, Uint32 size) |
{ | { |
if (tolower(*p++) != tolower(*q++)) |
_checkNullPointer(str); |
return false; |
|
|
size_t oldSize = _rep->size; |
|
size_t cap = oldSize + size; |
|
|
|
_reserve(_rep, (Uint32)cap); |
|
size_t utf8_error_index; |
|
size_t tmp = _convert( |
|
(Uint16*)_rep->data + oldSize, str, size, utf8_error_index); |
|
|
|
if (tmp == size_t(-1)) |
|
{ |
|
StringRep::free(_rep); |
|
_rep = &StringRep::_emptyRep; |
|
_StringThrowBadUTF8((Uint32)utf8_error_index); |
} | } |
else if (*p++ != *q++) |
|
return false; |
_rep->size += tmp; |
|
_rep->data[_rep->size] = '\0'; |
|
|
|
return *this; |
} | } |
| |
return true; |
void String::remove(Uint32 index, Uint32 n) |
|
{ |
|
if (n == PEG_NOT_FOUND) |
|
n = (Uint32)(_rep->size - index); |
|
|
|
_checkBounds(index + n, _rep->size); |
|
|
|
if (_rep->refs.get() != 1) |
|
_rep = StringRep::copyOnWrite(_rep); |
|
|
|
PEGASUS_ASSERT(index + n <= _rep->size); |
|
|
|
size_t rem = _rep->size - (index + n); |
|
Uint16* data = _rep->data; |
|
|
|
if (rem) |
|
memmove(data + index, data + index + n, rem * sizeof(Uint16)); |
|
|
|
_rep->size -= n; |
|
data[_rep->size] = '\0'; |
} | } |
| |
String String::subString(Uint32 pos, Uint32 length) const |
String String::subString(Uint32 index, Uint32 n) const |
{ | { |
if (pos < size()) |
// Note: this implementation is very permissive but used for |
|
// backwards compatibility. |
|
|
|
if (index < _rep->size) |
{ | { |
if (length == PEG_NOT_FOUND) |
if (n == PEG_NOT_FOUND || n > _rep->size - index) |
length = size() - pos; |
n = (Uint32)(_rep->size - index); |
| |
return String(getData() + pos, length); |
return String((Char16*)(_rep->data + index), n); |
} | } |
else |
|
return String(); | return String(); |
} | } |
| |
Uint32 String::find(Char16 c) const | Uint32 String::find(Char16 c) const |
{ | { |
const Char16* first = getData(); |
Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c); |
| |
for (const Char16* p = first; *p; p++) |
if (p) |
{ |
return static_cast<Uint32>(p - _rep->data); |
if (*p == c) |
|
return p - first; |
|
} |
|
| |
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
} | } |
| |
Uint32 String::find(const String& s) const |
Uint32 String::find(Uint32 index, Char16 c) const |
{ | { |
const Char16* pSubStr = s.getData(); |
_checkBounds(index, _rep->size); |
const Char16* pStr = getData(); |
|
Uint32 subStrLen = s.size(); |
if (index >= _rep->size) |
Uint32 strLen = size(); |
return PEG_NOT_FOUND; |
|
|
// loop to find first char match |
Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c); |
Uint32 loc = 0; |
|
for( ; loc <= (strLen-subStrLen); loc++) |
if (p) |
{ |
return static_cast<Uint32>(p - _rep->data); |
if (*pStr++ == *pSubStr) // match first char |
|
{ |
return PEG_NOT_FOUND; |
// point to substr 2nd char |
|
const Char16* p = pSubStr + 1; |
|
|
|
// Test remaining chars for equal |
|
Uint32 i = 1; |
|
for (; i < subStrLen; i++) |
|
if (*pStr++ != *p++ ) |
|
{pStr--; break;} // break from loop |
|
if (i == subStrLen) |
|
return loc; |
|
} | } |
|
|
|
Uint32 StringFindAux( |
|
const StringRep* _rep, const Char16* s, Uint32 n) |
|
{ |
|
_checkNullPointer(s); |
|
|
|
const Uint16* data = _rep->data; |
|
size_t rem = _rep->size; |
|
|
|
while (n <= rem) |
|
{ |
|
Uint16* p = (Uint16*)_find(data, rem, s[0]); |
|
|
|
if (!p) |
|
break; |
|
|
|
if (memcmp(p, s, n * sizeof(Uint16)) == 0) |
|
return static_cast<Uint32>(p - _rep->data); |
|
|
|
p++; |
|
rem -= p - data; |
|
data = p; |
} | } |
|
|
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
} | } |
| |
// ATTN:KS 5 apr 2000 Need to add the Char16* version. |
|
Uint32 String::find(const char* s) const | Uint32 String::find(const char* s) const |
{ | { |
|
_checkNullPointer(s); |
|
|
|
// Note: could optimize away creation of temporary, but this is rarely |
|
// called. |
return find(String(s)); | return find(String(s)); |
} | } |
| |
Uint32 String::reverseFind(Char16 c) const | Uint32 String::reverseFind(Char16 c) const |
{ | { |
const Char16* first = getData(); |
Uint16 x = c; |
const Char16* last = getData() + size(); |
Uint16* p = _rep->data; |
|
Uint16* q = _rep->data + _rep->size; |
| |
while (last != first) |
while (q != p) |
{ | { |
if (*--last == c) |
if (*--q == x) |
return last - first; |
return static_cast<Uint32>(q - p); |
} | } |
| |
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
|
|
| |
void String::toLower() | void String::toLower() |
{ | { |
for (Char16* p = &_rep[0]; *p; p++) |
#ifdef PEGASUS_HAS_ICU |
|
|
|
if (InitializeICU::initICUSuccessful()) |
|
{ |
|
if (_rep->refs.get() != 1) |
|
_rep = StringRep::copyOnWrite(_rep); |
|
|
|
// This will do a locale-insensitive, but context-sensitive convert. |
|
// Since context-sensitive casing looks at adjacent chars, this |
|
// prevents optimizations where the us-ascii is converted before |
|
// calling ICU. |
|
// The string may shrink or expand after the convert. |
|
|
|
//// First calculate size of resulting string. u_strToLower() returns |
|
//// only the size when zero is passed as the destination size argument. |
|
|
|
UErrorCode err = U_ZERO_ERROR; |
|
|
|
int32_t newSize = u_strToLower( |
|
NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err); |
|
|
|
err = U_ZERO_ERROR; |
|
|
|
//// Reserve enough space for the result. |
|
|
|
if ((Uint32)newSize > _rep->cap) |
|
_reserve(_rep, newSize); |
|
|
|
//// Perform the conversion (overlapping buffers are allowed). |
|
|
|
u_strToLower((UChar*)_rep->data, newSize, |
|
(UChar*)_rep->data, _rep->size, NULL, &err); |
|
|
|
_rep->size = newSize; |
|
return; |
|
} |
|
|
|
#endif /* PEGASUS_HAS_ICU */ |
|
|
|
if (_rep->refs.get() != 1) |
|
_rep = StringRep::copyOnWrite(_rep); |
|
|
|
Uint16* p = _rep->data; |
|
size_t n = _rep->size; |
|
|
|
for (; n--; p++) |
|
{ |
|
if (!(*p & 0xFF00)) |
|
*p = _toLower(*p); |
|
} |
|
} |
|
|
|
void String::toUpper() |
|
{ |
|
#ifdef PEGASUS_HAS_ICU |
|
|
|
if (InitializeICU::initICUSuccessful()) |
{ | { |
if (*p <= 127) |
if (_rep->refs.get() != 1) |
*p = tolower(*p); |
_rep = StringRep::copyOnWrite(_rep); |
|
|
|
// This will do a locale-insensitive, but context-sensitive convert. |
|
// Since context-sensitive casing looks at adjacent chars, this |
|
// prevents optimizations where the us-ascii is converted before |
|
// calling ICU. |
|
// The string may shrink or expand after the convert. |
|
|
|
//// First calculate size of resulting string. u_strToUpper() returns |
|
//// only the size when zero is passed as the destination size argument. |
|
|
|
UErrorCode err = U_ZERO_ERROR; |
|
|
|
int32_t newSize = u_strToUpper( |
|
NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err); |
|
|
|
err = U_ZERO_ERROR; |
|
|
|
//// Reserve enough space for the result. |
|
|
|
if ((Uint32)newSize > _rep->cap) |
|
_reserve(_rep, newSize); |
|
|
|
//// Perform the conversion (overlapping buffers are allowed). |
|
|
|
u_strToUpper((UChar*)_rep->data, newSize, |
|
(UChar*)_rep->data, _rep->size, NULL, &err); |
|
|
|
_rep->size = newSize; |
|
|
|
return; |
} | } |
|
|
|
#endif /* PEGASUS_HAS_ICU */ |
|
|
|
if (_rep->refs.get() != 1) |
|
_rep = StringRep::copyOnWrite(_rep); |
|
|
|
Uint16* p = _rep->data; |
|
size_t n = _rep->size; |
|
|
|
for (; n--; p++) |
|
*p = _toUpper(*p); |
} | } |
| |
void String::translate(Char16 fromChar, Char16 toChar) |
int String::compare(const String& s1, const String& s2, Uint32 n) |
|
{ |
|
const Uint16* p1 = s1._rep->data; |
|
const Uint16* p2 = s2._rep->data; |
|
|
|
while (n--) |
|
{ |
|
int r = *p1++ - *p2++; |
|
if (r) |
|
{ |
|
return r; |
|
} |
|
else if (!p1[-1]) |
{ | { |
for (Char16* p = &_rep[0]; *p; p++) |
// We must have encountered a null terminator in both s1 and s2 |
|
return 0; |
|
} |
|
} |
|
return 0; |
|
} |
|
|
|
int String::compare(const String& s1, const String& s2) |
{ | { |
if (*p == fromChar) |
return _compare(s1._rep->data, s2._rep->data); |
*p = toChar; |
|
} | } |
|
|
|
int String::compare(const String& s1, const char* s2) |
|
{ |
|
_checkNullPointer(s2); |
|
|
|
#ifdef PEGASUS_STRING_NO_UTF8 |
|
return _compareNoUTF8(s1._rep->data, s2); |
|
#else |
|
// ATTN: optimize this! |
|
return String::compare(s1, String(s2)); |
|
#endif |
} | } |
| |
int String::compare(const Char16* s1, const Char16* s2) |
int String::compareNoCase(const String& str1, const String& str2) |
{ | { |
|
#ifdef PEGASUS_HAS_ICU |
|
|
|
if (InitializeICU::initICUSuccessful()) |
|
{ |
|
return u_strcasecmp( |
|
(const UChar*)str1._rep->data, |
|
(const UChar*)str2._rep->data, |
|
U_FOLD_CASE_DEFAULT |
|
); |
|
} |
|
|
|
#endif /* PEGASUS_HAS_ICU */ |
|
|
|
const Uint16* s1 = str1._rep->data; |
|
const Uint16* s2 = str2._rep->data; |
|
|
while (*s1 && *s2) | while (*s1 && *s2) |
{ | { |
int r = *s1++ - *s2++; |
int r = _toLower(*s1++) - _toLower(*s2++); |
| |
if (r) | if (r) |
return r; | return r; |
|
|
return 0; | return 0; |
} | } |
| |
PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& x) |
Boolean StringEqualNoCase(const String& s1, const String& s2) |
{ | { |
for (Uint32 i = 0, n = x.size(); i < n; i++) |
#ifdef PEGASUS_HAS_ICU |
os << x[i]; |
|
| |
return os; |
return String::compareNoCase(s1, s2) == 0; |
} |
|
|
#else /* PEGASUS_HAS_ICU */ |
| |
void String::toLower(char* str) |
// The following employs loop unrolling for efficiency. Please do not |
|
// eliminate. |
|
|
|
Uint16* p = (Uint16*)s1.getChar16Data(); |
|
Uint16* q = (Uint16*)s2.getChar16Data(); |
|
Uint32 n = s2.size(); |
|
|
|
while (n >= 8) |
|
{ |
|
if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) || |
|
((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) || |
|
((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) || |
|
((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) || |
|
((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) || |
|
((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) || |
|
((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) || |
|
((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) |
{ | { |
while (*str) |
return false; |
tolower(*str++); |
} |
|
|
|
n -= 8; |
|
p += 8; |
|
q += 8; |
} | } |
| |
String ToLower(const String& str) |
while (n >= 4) |
{ | { |
String tmp(str); |
if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) || |
|
((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) || |
|
((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) || |
|
((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3])))) |
|
{ |
|
return false; |
|
} |
| |
for (Uint32 i = 0, n = tmp.size(); i < n; i++) |
n -= 4; |
|
p += 4; |
|
q += 4; |
|
} |
|
|
|
while (n--) |
{ | { |
Char16 c = tmp[i]; |
if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0])))) |
|
return false; |
| |
if (c <= 127) |
p++; |
tmp[i] = tolower(c); |
q++; |
} | } |
| |
return tmp; |
return true; |
|
|
|
#endif /* PEGASUS_HAS_ICU */ |
} | } |
| |
int CompareNoCase(const char* s1, const char* s2) |
Boolean String::equalNoCase(const String& s1, const char* s2) |
{ | { |
while (*s1 && *s2) |
_checkNullPointer(s2); |
|
|
|
#if defined(PEGASUS_HAS_ICU) |
|
|
|
return String::equalNoCase(s1, String(s2)); |
|
|
|
#elif defined(PEGASUS_STRING_NO_UTF8) |
|
|
|
const Uint16* p1 = (Uint16*)s1._rep->data; |
|
const char* p2 = s2; |
|
size_t n = s1._rep->size; |
|
|
|
while (n--) |
{ | { |
int r = tolower(*s1++) - tolower(*s2++); |
if (!*p2) |
|
return false; |
| |
if (r) |
if (_toUpper(*p1++) != _toUpperTable[int(*p2++)]) |
return r; |
return false; |
} | } |
| |
if (*s2) |
if (*p2) |
return -1; |
return false; |
else if (*s1) |
|
return 1; |
|
| |
return 0; |
return true; |
|
|
|
#else /* PEGASUS_HAS_ICU */ |
|
|
|
// ATTN: optimize this! |
|
return String::equalNoCase(s1, String(s2)); |
|
|
|
#endif /* PEGASUS_HAS_ICU */ |
} | } |
| |
Boolean GetLine(PEGASUS_STD(istream)& is, String& line) |
Boolean String::equal(const String& s1, const String& s2) |
{ | { |
line.clear(); |
return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, |
|
s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0; |
Boolean gotChar = false; |
} |
char c; |
|
| |
while (is.get(c)) |
Boolean String::equal(const String& s1, const char* s2) |
{ | { |
gotChar = true; |
#ifdef PEGASUS_STRING_NO_UTF8 |
| |
if (c == '\n') |
_checkNullPointer(s2); |
break; |
|
|
const Uint16* p = (Uint16*)s1._rep->data; |
|
const char* q = s2; |
| |
line.append(c); |
while (*p && *q) |
|
{ |
|
if (*p++ != Uint16(*q++)) |
|
return false; |
} | } |
| |
return gotChar; |
return !(*p || *q); |
|
|
|
#else /* PEGASUS_STRING_NO_UTF8 */ |
|
|
|
return String::equal(s1, String(s2)); |
|
|
|
#endif /* PEGASUS_STRING_NO_UTF8 */ |
} | } |
| |
String::~String() |
PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str) |
{ | { |
|
#if defined(PEGASUS_OS_OS400) |
|
|
|
CString cstr = str.getCString(); |
|
const char* utf8str = cstr; |
|
os << utf8str; |
|
return os; |
|
#else |
|
|
|
#if defined(PEGASUS_HAS_ICU) |
|
|
|
if (InitializeICU::initICUSuccessful()) |
|
{ |
|
char *buf = NULL; |
|
const int size = str.size() * 6; |
|
UnicodeString UniStr( |
|
(const UChar *)str.getChar16Data(), (int32_t)str.size()); |
|
Uint32 bufsize = UniStr.extract(0,size,buf); |
|
buf = new char[bufsize+1]; |
|
UniStr.extract(0,bufsize,buf); |
|
os << buf; |
|
os.flush(); |
|
delete [] buf; |
|
return os; |
} | } |
| |
String& String::assign(const String& x) |
#endif // PEGASUS_HAS_ICU |
|
|
|
for (Uint32 i = 0, n = str.size(); i < n; i++) |
{ | { |
_rep = x._rep; |
Uint16 code = str[i]; |
return *this; |
|
|
if (code > 0 && !(code & 0xFF00)) |
|
os << char(code); |
|
else |
|
{ |
|
// Print in hex format: |
|
char buffer[8]; |
|
sprintf(buffer, "\\x%04X", code); |
|
os << buffer; |
|
} |
} | } |
| |
String& String::append(const Char16& c) |
return os; |
|
#endif // PEGASUS_OS_OS400 |
|
} |
|
|
|
void StringAppendCharAux(StringRep*& _rep) |
{ | { |
_rep.insert(_rep.size() - 1, c); |
StringRep* tmp; |
return *this; |
|
|
if (_rep->cap) |
|
{ |
|
tmp = StringRep::alloc(2 * _rep->cap); |
|
tmp->size = _rep->size; |
|
_copy(tmp->data, _rep->data, _rep->size); |
|
} |
|
else |
|
{ |
|
tmp = StringRep::alloc(8); |
|
tmp->size = 0; |
} | } |
| |
void String::clear() |
StringRep::unref(_rep); |
|
_rep = tmp; |
|
} |
|
|
|
PEGASUS_NAMESPACE_END |
|
|
|
/* |
|
================================================================================ |
|
|
|
String optimizations: |
|
|
|
1. Added mechanism allowing certain functions to be inlined only when |
|
used by internal Pegasus modules. External modules (i.e., providers) |
|
link to a non-inline version, which allows for binary compatibility. |
|
|
|
2. Implemented copy-on-write with atomic increment/decrement. This |
|
yieled a 10% improvement for the 'gc' benchmark and a 11% improvment |
|
for the 'ni1000' benchmark. |
|
|
|
3. Employed loop unrolling in several places. For example, see: |
|
|
|
static Uint16* _find(const Uint16* s, size_t n, Uint16 c); |
|
|
|
4. Used the "empty-rep" optimization (described in whitepaper from the |
|
GCC Developers Summit). This reduced default construction to a simple |
|
pointer assignment. |
|
|
|
inline String::String() : _rep(&_emptyRep) { } |
|
|
|
5. Implemented Uint16 versions of toupper() and tolower() using tables. |
|
For example: |
|
|
|
static const char _upper[] = |
{ | { |
_rep.clear(); |
0,1,2,...255 |
_rep.append('\0'); |
}; |
|
|
|
inline Uint16 _toUpper(Uint16 x) |
|
{ |
|
return (x & 0xFF00) ? x : _upper[x]; |
} | } |
| |
void String::reserve(Uint32 capacity) |
This outperforms the system implementation by avoiding an anding |
|
operation. |
|
|
|
6. Implemented char* version of the following member functions to |
|
eliminate unecessary creation of anonymous string objects |
|
(temporaries). |
|
|
|
String(const String& s1, const char* s2); |
|
String(const char* s1, const String& s2); |
|
String& String::operator=(const char* str); |
|
Uint32 String::find(const char* s) const; |
|
bool String::equal(const String& s1, const char* s2); |
|
static int String::compare(const String& s1, const char* s2); |
|
String& String::append(const char* str); |
|
String& String::append(const char* str, Uint32 size); |
|
static bool String::equalNoCase(const String& s1, const char* s2); |
|
String& operator=(const char* str) |
|
String& String::assign(const char* str) |
|
String& String::append(const char* str) |
|
Boolean operator==(const String& s1, const char* s2) |
|
Boolean operator==(const char* s1, const String& s2) |
|
Boolean operator!=(const String& s1, const char* s2) |
|
Boolean operator!=(const char* s1, const String& s2) |
|
Boolean operator<(const String& s1, const char* s2) |
|
Boolean operator<(const char* s1, const String& s2) |
|
Boolean operator>(const String& s1, const char* s2) |
|
Boolean operator>(const char* s1, const String& s2) |
|
Boolean operator<=(const String& s1, const char* s2) |
|
Boolean operator<=(const char* s1, const String& s2) |
|
Boolean operator>=(const String& s1, const char* s2) |
|
Boolean operator>=(const char* s1, const String& s2) |
|
String operator+(const String& s1, const char* s2) |
|
String operator+(const char* s1, const String& s2) |
|
|
|
7. Optimized _roundUpToPow2(), used in rounding the capacity to the next |
|
power of two (algorithm from the book "Hacker's Delight"). |
|
|
|
static Uint32 _roundUpToPow2(Uint32 x) |
{ | { |
_rep.reserve(capacity + 1); |
if (x < 8) |
|
return 8; |
|
|
|
x--; |
|
x |= (x >> 1); |
|
x |= (x >> 2); |
|
x |= (x >> 4); |
|
x |= (x >> 8); |
|
x |= (x >> 16); |
|
x++; |
|
|
|
return x; |
} | } |
| |
const Array<String>& EmptyStringArray() |
8. Implemented "concatenating constructors" to eliminate temporaries |
|
created by operator+(). This scheme employs the "return-value |
|
optimization" described by Stan Lippman. |
|
|
|
inline String operator+(const String& s1, const String& s2) |
{ | { |
static Array<String> tmp; |
return String(s1, s2, 0); |
return tmp; |
|
} | } |
| |
PEGASUS_NAMESPACE_END |
9. Experimented to find the optimial initial size for a short string. |
|
Eight seems to offer the best tradeoff between space and time. |
|
|
|
10. Inlined all members of the Char16 class. |
|
|
|
11. Used Uint16 internally in the String class. This showed no improvememnt |
|
since Char16 was already fully inlined and was essentially reduced to |
|
Uint16 in any case. |
|
|
|
12. Implemented conditional logic (#if) allowing error checking logic to |
|
be excluded to better performance. Examples include bounds checking |
|
and null-pointer checking. |
|
|
|
13. Used memcpy() and memcmp() where possible. These are implemented using |
|
the rep family of intructions under Intel and are much faster. |
|
|
|
14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 |
|
copy routine overhead. |
|
|
|
15. Added ASCII7 form of the constructor and assign(). |
|
|
|
String s("hello world", String::ASCII7); |
|
|
|
s.assignASCII7("hello world"); |
|
|
|
This avoids slower UTF8 processing when not needed. |
|
|
|
================================================================================ |
|
|
|
TO-DO: |
|
|
|
(+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES |
|
|
|
(+) [DONE] Submit BUG-2754 (Windows buffer limit). |
|
|
|
(+) [DONE] Eliminate char versions of find() and append(). |
|
|
|
(+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h |
|
|
|
(+) [DONE] Change _next_pow_2() to _roundUpToPow2(). |
|
|
|
(+) [DONE] Change '99' to '2' in StringRep constructor (comment as well). |
|
|
|
(+) [DONE] Comment StringRep allocation layout. |
|
|
|
(+) [DONE] Conceal private inline functions. |
|
|
|
(+) [DONE] Shorten inclusion of StringInline.h in String.h. |
|
|
|
(+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get |
|
rid of altogether. |
|
|
|
(+) [DONE] useCamelNotationOnAllFunctionNames. |
|
|
|
(+) [DONE] Check for overlow condition in StringRep::alloc(). |
|
|
|
(+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab"). |
|
|
|
(+) [DONE] Fix throw-related memory leak. |
|
|
|
(+) [DONE] Look at PEP223 for coding security guidelines. |
|
|
|
(+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250). |
|
|
|
(+) [DONE] Removed appendASCII() and the ASCII form of the constructor. |
|
|
|
(+) DOC++ String.h - will open new bug? |
|
|
|
(+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression |
|
on certain platforms). |
|
|
|
================================================================================ |
|
*/ |