pegasus/src/Pegasus/Common/String.cpp - diff

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/String.cpp between version 1.111.2.7 and 1.111.6.2

version 1.111.2.7, 2005/09/30 12:57:15

version 1.111.6.2, 2005/10/08 01:59:52

Line 29

// Author: Mike Brasher (mbrasher@bmc.com)

// Modified By:

// Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)

// Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297

// David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)

// Mike Brasher (mike-brasher@austin.rr.com)

//%/////////////////////////////////////////////////////////////////////////////

#define PEGASUS_USE_INTERNAL_INLINES

#include "String.h"

#include <cassert>

#include "InternalException.h"

#include "CommonUTF.h"

#include "CharSet.h"

#include "MessageLoader.h"

#include "StringRep.h"

#ifdef PEGASUS_STRING_ENABLE_ICU

#ifdef PEGASUS_HAS_ICU

#include <unicode/ustring.h>

#include <unicode/uchar.h>

#endif

Line 49

Line 54

// Compile-time macros (undefined by default).

// PEGASUS_STRING_ENABLE_ICU -- enables use of ICU package

// PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions

// PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.

Line 65

Line 68

//==============================================================================

const Uint8 _to_upper_tbl[256] =

{

0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,

0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,

0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,

0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,

0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,

0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,

0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,

0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,

0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,

0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,

0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,

0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,

0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,

0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,

0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,

0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,

0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,

0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,

0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,

0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,

0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,

0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,

0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,

0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,

0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,

0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,

0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,

0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,

0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,

0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,

0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,

0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,

};

const Uint8 _to_lower_tbl[256] =

{

0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,

0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,

0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,

0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,

0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,

0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,

0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,

0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,

0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,

0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,

0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,

0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,

0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,

0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,

0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,

0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,

0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,

0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,

0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,

0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,

0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,

0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,

0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,

0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,

0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,

0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,

0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,

0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,

0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,

0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,

0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,

0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,

};

// Converts 16-bit characters to upper case.

inline Uint16 _to_upper(Uint16 x)

{

return (x & 0xFF00) ? x : CharSet::to_upper(x);

return (x & 0xFF00) ? x : _to_upper_tbl[x];

}

// Converts 16-bit characters to lower case.

inline Uint16 _to_lower(Uint16 x)

{

return (x & 0xFF00) ? x : CharSet::to_lower(x);

return (x & 0xFF00) ? x : _to_lower_tbl[x];

}

// Rounds x to the next power of two (or just returns 8 if x < 8).

Line 212

Line 287

while (n-- && (*s1++ - *s2++) == 0)

;

return s1[-1] - s2[-1];

}

Line 241

Line 318

Uint16* p = dest;

const Uint8* q = (const Uint8*)src;

// Process leading 7-bit ASCII characters (to avoid UTF8 overhead below

// Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).

// this loop). Use factor-four loop-unrolling.

// Use loop-unrolling.

while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)

while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p[4] = q[4];

p[5] = q[5];

p[6] = q[6];

p[7] = q[7];

p += 8;

q += 8;

n -= 8;

}

while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

Line 267

Line 359

}

break;

case 2:

if (q[0] < 128 && q[1] < 128)

if (((q[0]|q[1]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

Line 275

Line 367

}

break;

case 3:

if (q[0] < 128 && q[1] < 128 && q[2] < 128)

if (((q[0]|q[1]|q[2]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

Line 303

Line 395

if (c > n || !isValid_U8(q, c) ||

UTF8toUTF16(&q, q + c, &p, p + n) != 0)

{

throw Exception("Bad UTF8 encoding");

MessageLoaderParms parms("Common.String.BAD_UTF8",

"The byte sequence starting at index $0 "

"is not valid UTF-8 encoding.",

q - (const Uint8*)src);

throw Exception(parms);

}

n -= c;

Line 822

Line 918

void String::toLower()

{

#ifdef PEGASUS_STRING_ENABLE_ICU

#ifdef PEGASUS_HAS_ICU

if (InitializeICU::initICUSuccessful())

{

if (Atomic_get(&_rep->refs) != 1)

_rep = StringRep::copy_on_write(_rep);

// This will do a locale-insensitive, but context-sensitive convert.

// Since context-sensitive casing looks at adjacent chars, this

// prevents optimizations where the us-ascii is converted before

// calling ICU.

// The string may shrink or expand after the convert.

//// First calculate size of resulting string. u_strToLower() returns

//// only the size when zero is passed as the destination size argument.

Line 834

Line 939

int32_t new_size = u_strToLower(

NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);

err = U_ZERO_ERROR;

//// Reserve enough space for the result.

if ((Uint32)new_size > _rep->cap)

Line 845

Line 952

(UChar*)_rep->data, _rep->size, NULL, &err);

_rep->size = new_size;

return;

}

#endif /* PEGASUS_STRING_ENABLE_ICU */

#endif /* PEGASUS_HAS_ICU */

if (Atomic_get(&_rep->refs) != 1)

_rep = StringRep::copy_on_write(_rep);

Line 864

Line 972

void String::toUpper()

{

#ifdef PEGASUS_STRING_ENABLE_ICU

#ifdef PEGASUS_HAS_ICU

if (InitializeICU::initICUSuccessful())

{

if (Atomic_get(&_rep->refs) != 1)

_rep = StringRep::copy_on_write(_rep);

// This will do a locale-insensitive, but context-sensitive convert.

// Since context-sensitive casing looks at adjacent chars, this

// prevents optimizations where the us-ascii is converted before

// calling ICU.

// The string may shrink or expand after the convert.

//// First calculate size of resulting string. u_strToUpper() returns

//// only the size when zero is passed as the destination size argument.

Line 876

Line 993

int32_t new_size = u_strToUpper(

NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);

err = U_ZERO_ERROR;

//// Reserve enough space for the result.

if ((Uint32)new_size > _rep->cap)

Line 887

Line 1006

(UChar*)_rep->data, _rep->size, NULL, &err);

_rep->size = new_size;

return;

}

#endif /* PEGASUS_STRING_ENABLE_ICU */

#endif /* PEGASUS_HAS_ICU */

if (Atomic_get(&_rep->refs) != 1)

_rep = StringRep::copy_on_write(_rep);

Line 929

Line 1050

int String::compareNoCase(const String& str1, const String& str2)

{

#ifdef PEGASUS_STRING_ENABLE_ICU

#ifdef PEGASUS_HAS_ICU

if (InitializeICU::initICUSuccessful())

{

Line 937

Line 1058

str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);

}

#endif /* PEGASUS_STRING_ENABLE_ICU */

#endif /* PEGASUS_HAS_ICU */

const Uint16* s1 = str1._rep->data;

const Uint16* s2 = str2._rep->data;

Line 960

Line 1081

Boolean String::equalNoCase_aux(const String& s1, const String& s2)

{

#ifdef PEGASUS_STRING_ENABLE_ICU

#ifdef PEGASUS_HAS_ICU

return String::compareNoCase(s1, s2) == 0;

#else /* PEGASUS_STRING_ENABLE_ICU */

#else /* PEGASUS_HAS_ICU */

Uint16* p = (Uint16*)s1._rep->data;

Uint16* q = (Uint16*)s2._rep->data;

Line 1015

Line 1136

return true;

#endif /* PEGASUS_STRING_ENABLE_ICU */

#endif /* PEGASUS_HAS_ICU */

}

Boolean String::equalNoCase(const String& s1, const char* s2)

{

_check_null_pointer(s2);

#if defined(PEGASUS_STRING_ENABLE_ICU)

#if defined(PEGASUS_HAS_ICU)

return String::equalNoCase(s1, String(s2));

Line 1037

Line 1158

if (!*p2)

return false;

if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++)))

if (_to_upper(*p1++) != _to_upper_tbl[int(*p2++)])

return false;

}

if (*p2)

return false;

return true;

#else /* PEGASUS_STRING_ENABLE_ICU */

#else /* PEGASUS_HAS_ICU */

// ATTN: optimize this!

return String::equalNoCase(s1, String(s2));

#endif /* PEGASUS_STRING_ENABLE_ICU */

#endif /* PEGASUS_HAS_ICU */

}

Boolean String::equal(const String& s1, const String& s2)

Line 1088

Line 1212

CString cstr = str.getCString();

const char* utf8str = cstr;

os << utf8str;

return os;

#else

#elif defined(PEGASUS_STRING_ENABLE_ICU)

#if defined(PEGASUS_HAS_ICU)

if (InitializeICU::initICUSuccessful())

{

Line 1103

Line 1229

os << buf;

os.flush();

delete [] buf;

return os;

}

#endif /* PEGASUS_OS_OS400 */

#endif // PEGASUS_HAS_ICU

for (Uint32 i = 0, n = str.size(); i < n; i++)

{

Line 1123

Line 1250

}

return os;

#endif // PEGASUS_OS_OS400

}

void String::_append_char_aux()

Line 1246

Line 1374

}

9. Experimented to find the optimial initial size for a short string.

Eight seems to offer the best tradoff between space and time.

Eight seems to offer the best tradeoff between space and time.

10. Inlined all members of the Char16 class.

Line 1272

Line 1400

This avoids slower UTF8 processing when not needed.

BUG-4200 Review actions:

1. Use PEGASUS_USE_EXPERIMENTAL_INTERFACES instead of

PEGASUS_STRING_EXTENSIONS.

Status: done

2. Doc++ String.h

Status: pending

3. Look at PEP223 for security coding guidelines for strings.

Status: pending

4. Increasing the number of objects may break Windows 2000 build

(limit of 2048 bytes for command line). See BUG-2754

Status: looking into the use auto-generated linker files.

5. Concerns about whether generating inlines and non-inline versions

of functions will work with all compilers.

Status: confident it will work on platforms except maybe Windows.

================================================================================

Legend:

Removed from v.1.111.2.7
changed lines
	Added in v.1.111.6.2

No CVS admin address has been configured