pegasus/src/Pegasus/Common/String.cpp - diff

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/String.cpp between version 1.39 and 1.111.6.15

version 1.39, 2002/05/06 20:13:08

version 1.111.6.15, 2005/10/14 14:09:29

Line 1

//%/////////////////////////////////////////////////////////////////////////////

//%2005////////////////////////////////////////////////////////////////////////

// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.

// IBM Corp.; EMC Corporation, The Open Group.

// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.

// EMC Corporation; VERITAS Software Corporation; The Open Group.

// Permission is hereby granted, free of charge, to any person obtaining a copy

// of this software and associated documentation files (the "Software"), to

Line 20

Line 27

//==============================================================================

// Author: Mike Brasher (mbrasher@bmc.com)

// Author: Mike Brasher (mbrasher@austin.rr.com)

// Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)

// Modified By:

// Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)

// Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297

// David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)

// Mike Brasher (mike-brasher@austin.rr.com)

//%/////////////////////////////////////////////////////////////////////////////

#include <cassert>

#include <cctype>

#include "InternalException.h"

#include "String.h"

#include "CommonUTF.h"

#include "Exception.h"

#include "MessageLoader.h"

#include <iostream>

#include "StringRep.h"

PEGASUS_USING_STD;

#ifdef PEGASUS_HAS_ICU

#include <unicode/ustring.h>

#include <unicode/uchar.h>

#endif

PEGASUS_NAMESPACE_BEGIN

#define PEGASUS_ARRAY_T String

//==============================================================================

#include <Pegasus/Common/ArrayImpl.h>

#undef PEGASUS_ARRAY_T

// Compile-time macros (undefined by default).

// PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions

// PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.

//==============================================================================

///////////////////////////////////////////////////////////////////////////////

//==============================================================================

// String

// File-scope definitions:

///////////////////////////////////////////////////////////////////////////////

//==============================================================================

const String String::EMPTY = String();

// Note: this table is much faster than the system toupper(). Please do not

// change.

Uint32 _strnlen(const char* str, Uint32 n)

const Uint8 _toUpperTable[256] =

{

if (!str)

0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,

throw NullPointer();

0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,

0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,

0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,

0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,

0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,

0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,

0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,

0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,

0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,

0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,

0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,

0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,

0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,

0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,

0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,

0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,

0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,

0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,

0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,

0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,

0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,

0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,

0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,

0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,

0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,

0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,

0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,

0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,

0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,

0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,

0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,

};

// Note: this table is much faster than the system tulower(). Please do not

// change.

const Uint8 _toLowerTable[256] =

{

0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,

0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,

0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,

0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,

0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,

0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,

0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,

0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,

0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,

0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,

0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,

0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,

0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,

0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,

0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,

0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,

0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,

0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,

0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,

0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,

0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,

0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,

0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,

0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,

0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,

0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,

0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,

0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,

0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,

0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,

0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,

0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,

};

// Converts 16-bit characters to upper case. This routine is faster than the

// system toupper(). Please do not change.

inline Uint16 _toUpper(Uint16 x)

{

return (x & 0xFF00) ? x : _toUpperTable[x];

}

// Converts 16-bit characters to lower case. This routine is faster than the

// system toupper(). Please do not change.

inline Uint16 _toLower(Uint16 x)

{

return (x & 0xFF00) ? x : _toLowerTable[x];

}

for (Uint32 i=0; i<n; i++)

// Rounds x up to the nearest power of two (or just returns 8 if x < 8).

{

static Uint32 _roundUpToPow2(Uint32 x)

if (!*str)

{

return i;

#ifndef PEGASUS_STRING_NO_THROW

}

if (x > 0x0FFFFFFF)

throw PEGASUS_STD(bad_alloc)();

#endif

if (x < 8)

return 8;

x--;

x |= (x >> 1);

x |= (x >> 2);

x |= (x >> 4);

x |= (x >> 8);

x |= (x >> 16);

x++;

return x;

}

template<class P, class Q>

static void _copy(P* p, const Q* q, size_t n)

{

// The following employs loop unrolling for efficiency. Please do not

// eliminate.

while (n >= 8)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p[4] = q[4];

p[5] = q[5];

p[6] = q[6];

p[7] = q[7];

p += 8;

q += 8;

n -= 8;

}

while (n >= 4)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p += 4;

q += 4;

n -= 4;

}

return n;

while (n--)

*p++ = *q++;

}

Uint32 _strnlen(const Char16* str, Uint32 n)

static Uint16* _find(const Uint16* s, size_t n, Uint16 c)

{

if (!str)

// The following employs loop unrolling for efficiency. Please do not

throw NullPointer();

// eliminate.

for (Uint32 i=0; i<n; i++)

while (n >= 4)

{

if (!*str)

if (s[0] == c)

return (Uint16*)s;

if (s[1] == c)

return (Uint16*)&s[1];

if (s[2] == c)

return (Uint16*)&s[2];

if (s[3] == c)

return (Uint16*)&s[3];

n -= 4;

s += 4;

}

if (n)

{

return i;

if (*s == c)

return (Uint16*)s;

s++;

n--;

}

if (n)

{

if (*s == c)

return (Uint16*)s;

s++;

n--;

}

return n;

if (n && *s == c)

return (Uint16*)s;

// Not found!

return 0;

}

inline Uint32 _StrLen(const char* str)

static int _compare(const Uint16* s1, const Uint16* s2)

{

if (!str)

while (*s1 && *s2)

throw NullPointer();

{

int r = *s1++ - *s2++;

return strlen(str);

if (r)

return r;

}

inline Uint32 _StrLen(const Char16* str)

if (*s2)

return -1;

else if (*s1)

return 1;

return 0;

}

static int _compareNoUTF8(const Uint16* s1, const char* s2)

{

if (!str)

Uint16 c1;

throw NullPointer();

Uint16 c2;

Uint32 n = 0;

{

c1 = *s1++;

c2 = *s2++;

while (*str++)

if (c1 == 0)

n++;

return c1 - c2;

}

while (c1 == c2);

return n;

return c1 - c2;

}

String::String()

static int _compare(const Uint16* s1, const Uint16* s2, size_t n)

{

_rep.append('\0');

// This should only be called when s1 and s2 have the same length.

while (n-- && (*s1++ - *s2++) == 0)

;

return s1[-1] - s2[-1];

}

String::String(const String& str)

static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)

: _rep(str._rep)

{

memcpy(s1, s2, n * sizeof(Uint16));

}

String::String(const String& str, Uint32 n)

void StringThrowOutOfBounds()

{

assign(str.getData(), n);

throw IndexOutOfBoundsException();

}

String::String(const Char16* str)

inline void _checkNullPointer(const void* ptr)

: _rep(str, _StrLen(str) + 1)

{

#ifdef PEGASUS_STRING_NO_THROW

if (!ptr)

throw NullPointer();

#endif

}

String::String(const Char16* str, Uint32 n)

static void _StringThrowBadUTF8(Uint32 index)

{

assign(str, n);

MessageLoaderParms parms(

"Common.String.BAD_UTF8",

"The byte sequence starting at index $0 "

"is not valid UTF-8 encoding.",

index);

throw Exception(parms);

}

static size_t _copyFromUTF8(

Uint16* dest,

const char* src,

size_t n,

size_t& utf8_error_index)

{

Uint16* p = dest;

const Uint8* q = (const Uint8*)src;

// Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).

// Use loop-unrolling.

while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p[4] = q[4];

p[5] = q[5];

p[6] = q[6];

p[7] = q[7];

p += 8;

q += 8;

n -= 8;

}

while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p += 4;

q += 4;

n -= 4;

}

switch (n)

{

case 0:

return p - dest;

case 1:

if (q[0] < 128)

{

p[0] = q[0];

return p + 1 - dest;

}

break;

case 2:

if (((q[0]|q[1]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

return p + 2 - dest;

}

break;

case 3:

if (((q[0]|q[1]|q[2]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

return p + 3 - dest;

}

break;

}

String::String(const char* str)

// Process remaining characters.

while (n)

{

assign(str);

// Optimize for 7-bit ASCII case.

if (*q < 128)

{

*p++ = *q++;

n--;

}

else

{

Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;

String::String(const char* str, Uint32 n)

if (c > n || !isValid_U8(q, c) ||

UTF8toUTF16(&q, q + c, &p, p + n) != 0)

{

assign(str, n);

utf8_error_index = q - (const Uint8*)src;

return size_t(-1);

}

n -= c;

}

String::~String()

return p - dest;

}

// Note: dest must be at least three times src (plus an extra byte for

// terminator).

static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)

{

// The following employs loop unrolling for efficiency. Please do not

// eliminate.

const Uint16* q = src;

Uint8* p = (Uint8*)dest;

while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p += 4;

q += 4;

n -= 4;

}

String& String::operator=(const String& str)

switch (n)

{

case 0:

return p - (Uint8*)dest;

case 1:

if (q[0] < 128)

{

return assign(str);

p[0] = q[0];

return p + 1 - (Uint8*)dest;

}

break;

case 2:

if (q[0] < 128 && q[1] < 128)

{

p[0] = q[0];

p[1] = q[1];

return p + 2 - (Uint8*)dest;

}

break;

case 3:

if (q[0] < 128 && q[1] < 128 && q[2] < 128)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

return p + 3 - (Uint8*)dest;

}

break;

}

// If this line was reached, there must be characters greater than 128.

String& String::operator=(const Char16* str)

UTF16toUTF8(&q, q + n, &p, p + 3 * n);

return p - (Uint8*)dest;

}

static inline size_t _convert(

Uint16* p, const char* q, size_t n, size_t& utf8_error_index)

{

return assign(str);

#ifdef PEGASUS_STRING_NO_UTF8

_copy(p, q, n);

return n;

#else

return _copyFromUTF8(p, q, n, utf8_error_index);

#endif

}

String& String::assign(const String& str)

//==============================================================================

// class CString

//==============================================================================

CString::CString(const CString& cstr) : _rep(0)

{

_rep = str._rep;

if (cstr._rep)

return *this;

{

size_t n = strlen(cstr._rep) + 1;

_rep = (char*)operator new(n);

memcpy(_rep, cstr._rep, n);

}

String& String::assign(const Char16* str)

CString& CString::operator=(const CString& cstr)

{

_rep.clear();

if (&cstr != this)

_rep.append(str, _StrLen(str) + 1);

{

return *this;

if (_rep)

{

operator delete(_rep);

_rep = 0;

}

String& String::assign(const Char16* str, Uint32 n)

if (cstr._rep)

{

_rep.clear();

size_t n = strlen(cstr._rep) + 1;

Uint32 m = _strnlen(str, n);

_rep = (char*)operator new(n);

_rep.append(str, m);

memcpy(_rep, cstr._rep, n);

_rep.append('\0');

}

return *this;

}

String& String::assign(const char* str)

//==============================================================================

// class StringRep

//==============================================================================

StringRep StringRep::_emptyRep;

inline StringRep* StringRep::alloc(size_t cap)

{

_rep.clear();

#ifndef PEGASUS_STRING_NO_THROW

Uint32 n = strlen(str) + 1;

// Any string bigger than this is seriously suspect.

_rep.reserve(n);

if (cap > 0x0FFFFFFF)

throw PEGASUS_STD(bad_alloc)();

while (n--)

#endif

_rep.append(*str++);

return *this;

StringRep* rep = (StringRep*)::operator new(

sizeof(StringRep) + cap * sizeof(Uint16));

rep->cap = cap;

new(&rep->refs) AtomicInt(1);

return rep;

}

String& String::assign(const char* str, Uint32 n)

static inline void _reserve(StringRep*& rep, Uint32 cap)

{

if (cap > rep->cap || rep->refs.value() != 1)

{

size_t n = _roundUpToPow2(cap);

StringRep* newRep = StringRep::alloc(n);

newRep->size = rep->size;

_copy(newRep->data, rep->data, rep->size + 1);

StringRep::unref(rep);

rep = newRep;

}

StringRep* StringRep::create(const Uint16* data, size_t size)

{

StringRep* rep = StringRep::alloc(size);

rep->size = size;

_copy(rep->data, data, size);

rep->data[size] = '\0';

return rep;

}

StringRep* StringRep::copyOnWrite(StringRep* rep)

{

_rep.clear();

// Return a new copy of rep. Release rep.

Uint32 _n = _strnlen(str, n);

StringRep* newRep = StringRep::alloc(rep->size);

_rep.reserve(_n + 1);

newRep->size = rep->size;

_copy(newRep->data, rep->data, rep->size);

newRep->data[newRep->size] = '\0';

StringRep::unref(rep);

return newRep;

}

while (_n--)

StringRep* StringRep::create(const char* data, size_t size)

_rep.append(*str++);

{

StringRep* rep = StringRep::alloc(size);

size_t utf8_error_index;

rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);

_rep.append('\0');

#ifndef PEGASUS_STRING_NO_THROW

if (rep->size == size_t(-1))

{

StringRep::free(rep);

_StringThrowBadUTF8(utf8_error_index);

}

#endif

return *this;

rep->data[rep->size] = '\0';

return rep;

}

void String::clear()

Uint32 StringRep::length(const Uint16* str)

{

_rep.clear();

// Note: We could unroll this but it is rarely called.

_rep.append('\0');

const Uint16* end = (Uint16*)str;

while (*end++)

;

return end - str - 1;

}

void String::reserve(Uint32 capacity)

//==============================================================================

// class String

//==============================================================================

const String String::EMPTY;

String::String(const String& str, Uint32 n)

{

_rep.reserve(capacity + 1);

_checkBounds(n, str._rep->size);

_rep = StringRep::create(str._rep->data, n);

}

Uint32 String::size() const

String::String(const Char16* str)

{

return _rep.size() - 1;

_checkNullPointer(str);

_rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));

}

const Char16* String::getData() const

String::String(const Char16* str, Uint32 n)

{

return _rep.getData();

_checkNullPointer(str);

_rep = StringRep::create((Uint16*)str, n);

}

char* String::allocateCString(Uint32 extraBytes, Boolean noThrow) const

String::String(const char* str)

{

Uint32 n = size() + 1;

_checkNullPointer(str);

char* str = new char[n + extraBytes];

char* p = str;

const Char16* q = getData();

for (Uint32 i = 0; i < n; i++)

// Set this just in case create() throws an exception.

_rep = &StringRep::_emptyRep;

_rep = StringRep::create(str, strlen(str));

}

String::String(const char* str, Uint32 n)

{

Uint16 c = *q++;

_checkNullPointer(str);

*p++ = char(c);

if ((c & 0xff00) && !noThrow)

// Set this just in case create() throws an exception.

throw TruncatedCharacter();

_rep = &StringRep::_emptyRep;

_rep = StringRep::create(str, n);

}

return str;

String::String(const String& s1, const String& s2)

{

size_t n1 = s1._rep->size;

size_t n2 = s2._rep->size;

size_t n = n1 + n2;

_rep = StringRep::alloc(n);

_copy(_rep->data, s1._rep->data, n1);

_copy(_rep->data + n1, s2._rep->data, n2);

_rep->size = n;

_rep->data[n] = '\0';

}

void String::appendToCString(

String::String(const String& s1, const char* s2)

char* str,

Uint32 length,

Boolean noThrow) const

{

if (!str)

_checkNullPointer(s2);

throw NullPointer();

size_t n1 = s1._rep->size;

size_t n2 = strlen(s2);

_rep = StringRep::alloc(n1 + n2);

_copy(_rep->data, s1._rep->data, n1);

size_t utf8_error_index;

size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);

Uint32 n = (size() < length)? size() : length;

#ifndef PEGASUS_STRING_NO_THROW

if (tmp == size_t(-1))

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8(utf8_error_index);

}

#endif

_rep->size = n1 + tmp;

_rep->data[_rep->size] = '\0';

}

char* p = str + strlen(str);

String::String(const char* s1, const String& s2)

const Char16* q = getData();

{

_checkNullPointer(s1);

size_t n1 = strlen(s1);

size_t n2 = s2._rep->size;

_rep = StringRep::alloc(n1 + n2);

size_t utf8_error_index;

size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);

for (Uint32 i = 0; i < n; i++)

#ifndef PEGASUS_STRING_NO_THROW

if (tmp == size_t(-1))

{

Uint16 c = *q++;

StringRep::free(_rep);

*p++ = char(c);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8(utf8_error_index);

}

#endif

_rep->size = n2 + tmp;

_copy(_rep->data + n1, s2._rep->data, n2);

_rep->data[_rep->size] = '\0';

}

if ((c & 0xff00) && !noThrow)

String& String::assign(const String& str)

throw TruncatedCharacter();

{

if (_rep != str._rep)

{

StringRep::unref(_rep);

StringRep::ref(_rep = str._rep);

}

*p = '\0';

return *this;

}

Char16& String::operator[](Uint32 i)

String& String::assign(const Char16* str, Uint32 n)

{

_checkNullPointer(str);

if (n > _rep->cap || _rep->refs.value() != 1)

{

if (i > size())

StringRep::unref(_rep);

ThrowOutOfBounds();

_rep = StringRep::alloc(n);

}

return _rep[i];

_rep->size = n;

_copy(_rep->data, (Uint16*)str, n);

_rep->data[n] = '\0';

return *this;

}

const Char16 String::operator[](Uint32 i) const

String& String::assign(const char* str, Uint32 n)

{

if (i > size())

_checkNullPointer(str);

ThrowOutOfBounds();

return _rep[i];

if (n > _rep->cap || _rep->refs.value() != 1)

{

StringRep::unref(_rep);

_rep = StringRep::alloc(n);

}

String& String::append(const Char16& c)

size_t utf8_error_index;

_rep->size = _convert(_rep->data, str, n, utf8_error_index);

#ifndef PEGASUS_STRING_NO_THROW

if (_rep->size == size_t(-1))

{

_rep.insert(_rep.size() - 1, c);

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8(utf8_error_index);

}

#endif

_rep->data[_rep->size] = 0;

return *this;

}

void String::clear()

{

if (_rep->size)

{

if (_rep->refs.value() == 1)

{

_rep->size = 0;

_rep->data[0] = '\0';

}

else

{

StringRep::unref(_rep);

_rep = &StringRep::_emptyRep;

}

void String::reserveCapacity(Uint32 cap)

{

_reserve(_rep, cap);

}

CString String::getCString() const

{

// A UTF8 string can have three times as many characters as its UTF16

// counterpart, so we allocate extra memory for the worst case. In the

// best case, we may need only one third of the memory allocated. But

// downsizing the string afterwards is expensive and unecessary since

// CString objects are usually short-lived (disappearing after only a few

// instructions). CString objects are typically created on the stack as

// means to obtain a char* pointer.

#ifdef PEGASUS_STRING_NO_UTF8

char* str = (char*)operator new(_rep->size + 1);

_copy(str, _rep->data, _rep->size);

str[_rep->size] = '\0';

return CString(str);

#else

Uint32 n = 3 * _rep->size;

char* str = (char*)operator new(n + 1);

size_t size = _copyToUTF8(str, _rep->data, _rep->size);

str[size] = '\0';

return CString(str);

#endif

}

String& String::append(const Char16* str, Uint32 n)

{

Uint32 m = _strnlen(str, n);

_checkNullPointer(str);

_rep.reserve(_rep.size() + m);

_rep.remove(_rep.size() - 1);

size_t oldSize = _rep->size;

_rep.append(str, m);

size_t newSize = oldSize + n;

_rep.append('\0');

_reserve(_rep, newSize);

_copy(_rep->data + oldSize, (Uint16*)str, n);

_rep->size = newSize;

_rep->data[newSize] = '\0';

return *this;

}

String& String::append(const String& str)

{

return append(str.getData(), str.size());

return append((Char16*)str._rep->data, str._rep->size);

}

String& String::operator+=(const String& str)

String& String::append(const char* str, Uint32 size)

{

return append(str);

_checkNullPointer(str);

}

String& String::operator+=(Char16 c)

size_t oldSize = _rep->size;

size_t cap = oldSize + size;

_reserve(_rep, cap);

size_t utf8_error_index;

size_t tmp = _convert(

(Uint16*)_rep->data + oldSize, str, size, utf8_error_index);

#ifndef PEGASUS_STRING_NO_THROW

if (tmp == size_t(-1))

{

return append(c);

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8(utf8_error_index);

}

#endif

String& String::operator+=(char c)

_rep->size += tmp;

{

_rep->data[_rep->size] = '\0';

return append(Char16(c));

return *this;

}

void String::remove(Uint32 pos, Uint32 size)

void String::remove(Uint32 index, Uint32 n)

{

if (size == PEG_NOT_FOUND)

if (n == PEG_NOT_FOUND)

size = this->size() - pos;

n = _rep->size - index;

_checkBounds(index + n, _rep->size);

if (_rep->refs.value() != 1)

_rep = StringRep::copyOnWrite(_rep);

if (pos + size > this->size())

assert(index + n <= _rep->size);

ThrowOutOfBounds();

if (size)

size_t rem = _rep->size - (index + n);

_rep.remove(pos, size);

Uint16* data = _rep->data;

if (rem)

memmove(data + index, data + index + n, rem * sizeof(Uint16));

_rep->size -= n;

data[_rep->size] = '\0';

}

String String::subString(Uint32 pos, Uint32 length) const

String String::subString(Uint32 index, Uint32 n) const

{

if (pos < size())

// Note: this implementation is very permissive but used for

// backwards compatibility.

if (index < _rep->size)

{

if (length == PEG_NOT_FOUND)

if (n == PEG_NOT_FOUND || n > _rep->size - index)

length = size() - pos;

n = _rep->size - index;

return String(getData() + pos, length);

return String((Char16*)_rep->data + index, n);

}

else

return String();

}

Uint32 String::find(Char16 c) const

{

const Char16* first = getData();

Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);

for (const Char16* p = first; *p; p++)

if (p)

{

return p - _rep->data;

if (*p == c)

return p - first;

}

return PEG_NOT_FOUND;

}

Uint32 String::find(Uint32 pos, Char16 c) const

Uint32 String::find(Uint32 index, Char16 c) const

{

const Char16* data = getData();

_checkBounds(index, _rep->size);

for (Uint32 i = pos, n = size(); i < n; i++)

if (index >= _rep->size)

{

return PEG_NOT_FOUND;

if (data[i] == c)

return i;

Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);

}

if (p)

return p - _rep->data;

return PEG_NOT_FOUND;

}

Uint32 String::find(const String& s) const

Uint32 StringFindAux(

const StringRep* _rep, const Char16* s, Uint32 n)

{

const Char16* pSubStr = s.getData();

_checkNullPointer(s);

const Char16* pStr = getData();

Uint32 subStrLen = s.size();

Uint32 strLen = size();

if (subStrLen > strLen)

const Uint16* data = _rep->data;

size_t rem = _rep->size;

while (n <= rem)

{

return PEG_NOT_FOUND;

Uint16* p = (Uint16*)_find(data, rem, s[0]);

}

// loop to find first char match

if (!p)

Uint32 loc = 0;

break;

for( ; loc <= (strLen-subStrLen); loc++)

{

if (memcmp(p, s, n * sizeof(Uint16)) == 0)

if (*pStr++ == *pSubStr) // match first char

return p - _rep->data;

{

// point to substr 2nd char

p++;

const Char16* p = pSubStr + 1;

rem -= p - data;

data = p;

// Test remaining chars for equal

Uint32 i = 1;

for (; i < subStrLen; i++)

if (*pStr++ != *p++ )

{pStr--; break;} // break from loop

if (i == subStrLen)

return loc;

}

return PEG_NOT_FOUND;

}

Uint32 String::find(const Char16* s) const

return PEG_NOT_FOUND;

{

return find(String(s));

}

Uint32 String::find(const char* s) const

{

_checkNullPointer(s);

// Note: could optimize away creation of temporary, but this is rarely

// called.

return find(String(s));

}

Uint32 String::reverseFind(Char16 c) const

{

const Char16* first = getData();

Uint16 x = c;

const Char16* last = getData() + size();

Uint16* p = _rep->data;

Uint16* q = _rep->data + _rep->size;

while (last != first)

while (q != p)

{

if (*--last == c)

if (*--q == x)

return last - first;

return q - p;

}

return PEG_NOT_FOUND;

Line 427

Line 995

void String::toLower()

{

for (Char16* p = &_rep[0]; *p; p++)

#ifdef PEGASUS_HAS_ICU

if (InitializeICU::initICUSuccessful())

{

#ifdef PEGASUS_HAS_EBCDIC

if (_rep->refs.value() != 1)

if (*p <= 255)

_rep = StringRep::copyOnWrite(_rep);

#else

if (*p <= 127)

// This will do a locale-insensitive, but context-sensitive convert.

#endif

// Since context-sensitive casing looks at adjacent chars, this

*p = tolower(*p);

// prevents optimizations where the us-ascii is converted before

}

// calling ICU.

// The string may shrink or expand after the convert.

//// First calculate size of resulting string. u_strToLower() returns

//// only the size when zero is passed as the destination size argument.

UErrorCode err = U_ZERO_ERROR;

int32_t newSize = u_strToLower(

NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);

err = U_ZERO_ERROR;

//// Reserve enough space for the result.

if ((Uint32)newSize > _rep->cap)

_reserve(_rep, newSize);

//// Perform the conversion (overlapping buffers are allowed).

u_strToLower((UChar*)_rep->data, newSize,

(UChar*)_rep->data, _rep->size, NULL, &err);

_rep->size = newSize;

return;

}

void String::toLower(char* str)

#endif /* PEGASUS_HAS_ICU */

if (_rep->refs.value() != 1)

_rep = StringRep::copyOnWrite(_rep);

Uint16* p = _rep->data;

size_t n = _rep->size;

for (; n--; p++)

{

while (*str)

if (!(*p & 0xFF00))

tolower(*str++);

*p = _toLower(*p);

}

void String::translate(Char16 fromChar, Char16 toChar)

void String::toUpper()

{

for (Char16* p = &_rep[0]; *p; p++)

#ifdef PEGASUS_HAS_ICU

if (InitializeICU::initICUSuccessful())

{

if (*p == fromChar)

if (_rep->refs.value() != 1)

*p = toChar;

_rep = StringRep::copyOnWrite(_rep);

// This will do a locale-insensitive, but context-sensitive convert.

// Since context-sensitive casing looks at adjacent chars, this

// prevents optimizations where the us-ascii is converted before

// calling ICU.

// The string may shrink or expand after the convert.

//// First calculate size of resulting string. u_strToUpper() returns

//// only the size when zero is passed as the destination size argument.

UErrorCode err = U_ZERO_ERROR;

int32_t newSize = u_strToUpper(

NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);

err = U_ZERO_ERROR;

//// Reserve enough space for the result.

if ((Uint32)newSize > _rep->cap)

_reserve(_rep, newSize);

//// Perform the conversion (overlapping buffers are allowed).

u_strToUpper((UChar*)_rep->data, newSize,

(UChar*)_rep->data, _rep->size, NULL, &err);

_rep->size = newSize;

return;

}

#endif /* PEGASUS_HAS_ICU */

if (_rep->refs.value() != 1)

_rep = StringRep::copyOnWrite(_rep);

Uint16* p = _rep->data;

size_t n = _rep->size;

for (; n--; p++)

*p = _toUpper(*p);

}

void String::print() const

int String::compare(const String& s1, const String& s2, Uint32 n)

{

cout << *this << endl;

assert(n <= s1._rep->size);

assert(n <= s2._rep->size);

// Ignoring error in which n is greater than s1.size() or s2.size()

return _compare(s1._rep->data, s2._rep->data, n);

}

int String::compare(const Char16* s1, const Char16* s2, Uint32 n)

int String::compare(const String& s1, const String& s2)

{

while (n--)

return _compare(s1._rep->data, s2._rep->data);

}

int String::compare(const String& s1, const char* s2)

{

int r = *s1++ - *s2++;

_checkNullPointer(s2);

if (r)

#ifdef PEGASUS_STRING_NO_UTF8

return r;

return _compareNoUTF8(s1._rep->data, s2);

#else

// ATTN: optimize this!

return String::compare(s1, String(s2));

#endif

}

return 0;

int String::compareNoCase(const String& str1, const String& str2)

}

{

#ifdef PEGASUS_HAS_ICU

int String::compare(const Char16* s1, const Char16* s2)

if (InitializeICU::initICUSuccessful())

{

return u_strcasecmp(

str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);

}

#endif /* PEGASUS_HAS_ICU */

const Uint16* s1 = str1._rep->data;

const Uint16* s2 = str2._rep->data;

while (*s1 && *s2)

{

int r = *s1++ - *s2++;

int r = _toLower(*s1++) - _toLower(*s2++);

if (r)

return r;

Line 489

Line 1156

return 0;

}

int String::compareNoCase(const char* s1, const char* s2, Uint32 n)

Boolean StringEqualNoCase(const String& s1, const String& s2)

{

while (n--)

#ifdef PEGASUS_HAS_ICU

{

int r = tolower(*s1++) - tolower(*s2++);

if (r)

return String::compareNoCase(s1, s2) == 0;

return r;

}

return 0;

#else /* PEGASUS_HAS_ICU */

}

// The following employs loop unrolling for efficiency. Please do not

// eliminate.

int String::compareNoCase(const char* s1, const char* s2)

Uint16* p = (Uint16*)s1.getChar16Data();

Uint16* q = (Uint16*)s2.getChar16Data();

Uint32 n = s2.size();

while (n >= 8)

{

while (*s1 && *s2)

if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||

((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||

((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||

((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||

((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||

((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||

((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||

((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))

{

int r = tolower(*s1++) - tolower(*s2++);

return false;

if (r)

return r;

}

if (*s2)

n -= 8;

return -1;

p += 8;

else if (*s1)

q += 8;

return 1;

return 0;

}

Boolean String::equal(const String& str1, const String& str2)

while (n >= 4)

{

if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||

((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||

((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||

((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))

{

if (str1.size() != str2.size())

return false;

}

return String::compare(str1.getData(), str2.getData(), str1.size()) == 0;

n -= 4;

p += 4;

q += 4;

}

Boolean String::equal(const String& str1, const Char16* str2)

while (n--)

{

if (str1.size() != _StrLen(str2))

if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))

return false;

return String::compare(str1.getData(), str2, str1.size()) == 0;

p++;

q++;

}

Boolean String::equal(const Char16* str1, const String& str2)

return true;

{

return equal(str2, str1);

}

Boolean String::equal(const String& str1, const char* str2)

#endif /* PEGASUS_HAS_ICU */

{

return equal(str1, String(str2));

}

Boolean String::equal(const char* str1, const String& str2)

Boolean String::equalNoCase(const String& s1, const char* s2)

{

return equal(String(str1), str2);

_checkNullPointer(s2);

}

Boolean String::equalNoCase(const String& str1, const String& str2)

#if defined(PEGASUS_HAS_ICU)

{

if (str1.size() != str2.size())

return false;

const Char16* p = str1.getData();

return String::equalNoCase(s1, String(s2));

const Char16* q = str2.getData();

Uint32 n = str1.size();

#elif defined(PEGASUS_STRING_NO_UTF8)

const Uint16* p1 = (Uint16*)s1._rep->data;

const char* p2 = s2;

size_t n = s1._rep->size;

while (n--)

{

#ifdef PEGASUS_HAS_EBCDIC

if (!*p2)

if (*p <= 255 && *q <= 255)

#else

if (*p <= 127 && *q <= 127)

#endif

{

if (tolower(*p++) != tolower(*q++))

return false;

}

else if (*p++ != *q++)

if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])

return false;

}

if (*p2)

return false;

return true;

}

//#define NEWMATCHFUNCTION

#else /* PEGASUS_HAS_ICU */

#if defined NEWMATCHFUNCTION

// Wildcard String matching function that may be useful in the future

// The following code was provided by Bob Blair.

/* _StringMatch Match input MatchString against a GLOB style pattern

Note that MatchChar is the char type so that this source

in portable to different string types. This is an internal function

Results: The return value is 1 if string matches pattern, and

0 otherwise. The matching operation permits the following

special characters in the pattern: *?\[] (see the manual

entry for details on what these mean).

Side effects: None.

// ATTN: optimize this!

return String::equalNoCase(s1, String(s2));

/* MatchChar defined as a separate entity because this function source used

#endif /* PEGASUS_HAS_ICU */

elsewhere was an unsigned char *. Here we use Uint16 to maintain 16 bit

}

size.

typedef Uint16 MatchChar;

inline Uint16 _ToLower(Uint16 ch)

Boolean String::equal(const String& s1, const String& s2)

{

#ifdef PEGASUS_HAS_EBCDIC

return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,

return ch <= 255 ? tolower(char(ch)) : ch;

s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;

#else

return ch <= 127 ? tolower(char(ch)) : ch;

#endif

}

inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)

Boolean String::equal(const String& s1, const char* s2)

{

if (nocase)

#ifdef PEGASUS_STRING_NO_UTF8

return _ToLower(ch1) == _ToLower(ch2);

else

_checkNullPointer(s2);

return ch1 == ch2;

}

const Uint16* p = (Uint16*)s1._rep->data;

const char* q = s2;

static const MatchChar *

while (*p && *q)

_matchrange(const MatchChar *range, MatchChar c, int nocase)

{

const MatchChar *p = range;

if (*p++ != Uint16(*q++))

const MatchChar *rstart = range + 1;

return false;

const MatchChar *rend = 0;

MatchChar compchar;

for (rend = rstart; *rend && *rend != ']'; rend++);

if (*rend == ']') { // if there is an end to this pattern

for (compchar = *rstart; rstart != rend; rstart++) {

if (_Equal(*rstart, c, nocase))

return ++rend;

if (*rstart == '-') {

rstart++;

if (c >= compchar && c <= *rstart)

return ++rend;

}

return (const MatchChar *)0;

}

static int

_StringMatch(

const MatchChar *testString,

const MatchChar *pattern,

int nocase ) /* Ignore case if this is true */

{

const MatchChar *pat = pattern;

const MatchChar *str = testString;

unsigned int done = 0;

unsigned int res = 0; // the result: 1 == match

while (!done) { // main loop walks through pattern and test string

//cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;

if (!*pat) { //end of pattern

done = 1; // we're done

if (!*str) //end of test, too?

res = 1; // then we matched

} else { //Not end of pattern

if (!*str) { // but end of test

done = 1; // We're done

if (*pat == '*') // If pattern openends

res = 1; // then we matched

} else { //Not end of test

if (*pat == '*') { //Ambiguuity found

if (!*++pat) { //and it ends pattern

done = 1; // then we're done

res = 1; // and match

} else { //if it doesn't end

while (!done) { // until we're done

if (_StringMatch(str, pat, nocase)) { // we recurse

done = 1; //if it recurses true

res = 1; // we done and match

} else { //it recurses false

if (!*str) // see if test is done

done = 1; // yes: we done

else // not done:

str++; // keep testing

} // end test on recursive call

} // end looping on recursive calls

} // end logic when pattern is ambiguous

} else { //pattern not ambiguus

if (*pat == '?') { //pattern is 'any'

pat++, str++; // so move along

} else if (*pat == '[') { //see if it's a range

pat = _matchrange(pat, *str, nocase); // and is a match

if (!pat) { //It is not a match

done = 1; // we're done

res = 1; // no match

} else { //Range matches

str++, pat++; // keep going

}

} else { // only case left is individual characters

if (!_Equal(*pat++, *str++, nocase)) // if they don't match

done = 1; // bail.

}

} // end ("pattern is not ambiguous (*)" logic

} // end logic when pattern and string still have data

} // end logic when pattern still has data

} // end main loop

return res;

}

#else

return !(*p || *q);

////////////////////////////////////////////////////////////////////////////////

// String matching routines borrowed from Tcl 8.0:

////////////////////////////////////////////////////////////////////////////////

// This software is copyrighted by the Regents of the University of

// California, Sun Microsystems, Inc., and other parties. The following

// terms apply to all files associated with the software unless explicitly

// disclaimed in individual files.

// The authors hereby grant permission to use, copy, modify, distribute,

// and license this software and its documentation for any purpose, provided

// that existing copyright notices are retained in all copies and that this

// notice is included verbatim in any distributions. No written agreement,

// license, or royalty fee is required for any of the authorized uses.

// Modifications to this software may be copyrighted by their authors

// and need not follow the licensing terms described here, provided that

// the new terms are clearly indicated on the first page of each file where

// they apply.

// IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY

// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES

// ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY

// DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE

// POSSIBILITY OF SUCH DAMAGE.

// THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,

// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,

// FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE

// IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE

// NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR

// MODIFICATIONS.

// GOVERNMENT USE: If you are acquiring this software on behalf of the

// U.S. government, the Government shall have only "Restricted Rights"

// in the software and related documentation as defined in the Federal

// Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you

// are acquiring the software on behalf of the Department of Defense, the

// software shall be classified as "Commercial Computer Software" and the

// Government shall have only "Restricted Rights" as defined in Clause

// 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the

// authors grant the U.S. Government and others acting in its behalf

// permission to use and distribute the software in accordance with the

// terms specified in this license.

////////////////////////////////////////////////////////////////////////////////

#else /* PEGASUS_STRING_NO_UTF8 */

return String::equal(s1, String(s2));

*----------------------------------------------------------------------

* Tcl_StringMatch --

* See if a particular string matches a particular pattern.

* Results:

* The return value is 1 if string matches pattern, and

* 0 otherwise. The matching operation permits the following

* special characters in the pattern: *?\[] (see the manual

* entry for details on what these mean).

* Side effects:

* None.

*----------------------------------------------------------------------

typedef Uint16 MatchChar;

#endif /* PEGASUS_STRING_NO_UTF8 */

}

inline Uint16 _ToLower(Uint16 ch)

PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)

{

#ifdef PEGASUS_HAS_EBCDIC

#if defined(PEGASUS_OS_OS400)

return ch <= 255 ? tolower(char(ch)) : ch;

CString cstr = str.getCString();

const char* utf8str = cstr;

os << utf8str;

return os;

#else

return ch <= 127 ? tolower(char(ch)) : ch;

#endif

}

inline Boolean _Equal(Uint16 ch1, Uint16 ch2, int nocase)

#if defined(PEGASUS_HAS_ICU)

if (InitializeICU::initICUSuccessful())

{

if (nocase)

char *buf = NULL;

return _ToLower(ch1) == _ToLower(ch2);

const int size = str.size() * 6;

else

UnicodeString UniStr(

return ch1 == ch2;

(const UChar *)str.getChar16Data(), (int32_t)str.size());

Uint32 bufsize = UniStr.extract(0,size,buf);

buf = new char[bufsize+1];

UniStr.extract(0,bufsize,buf);

os << buf;

os.flush();

delete [] buf;

return os;

}

int _StringMatch(

#endif // PEGASUS_HAS_ICU

MatchChar *string, /* String. */

MatchChar *pattern, /* Pattern, which may contain special

* characters. */

int nocase) /* Ignore case if this is true */

{

MatchChar c2;

while (1) {

for (Uint32 i = 0, n = str.size(); i < n; i++)

/* See if we're at the end of both the pattern and the string.

{

* If so, we succeeded. If we're at the end of the pattern

Uint16 code = str[i];

* but not at the end of the string, we failed.

if (*pattern == 0) {

if (code > 0 && !(code & 0xFF00))

if (*string == 0) {

os << char(code);

return 1;

else

} else {

{

return 0;

// Print in hex format:

char buffer[8];

sprintf(buffer, "\\x%04X", code);

os << buffer;

}

if ((*string == 0) && (*pattern != '*')) {

return 0;

return os;

#endif // PEGASUS_OS_OS400

}

/* Check for a "*" as the next pattern character. It matches

void StringAppendCharAux(StringRep*& _rep)

* any substring. We handle this by calling ourselves

{

* recursively for each postfix of string, until either we

StringRep* tmp;

* match or we reach the end of the string.

if (*pattern == '*') {

if (_rep->cap)

pattern += 1;

{

if (*pattern == 0) {

tmp = StringRep::alloc(2 * _rep->cap);

return 1;

tmp->size = _rep->size;

}

_copy(tmp->data, _rep->data, _rep->size);

while (1) {

if (_StringMatch(string, pattern, nocase)) {

return 1;

}

if (*string == 0) {

return 0;

}

string += 1;

else

{

tmp = StringRep::alloc(8);

tmp->size = 0;

}

StringRep::unref(_rep);

_rep = tmp;

}

/* Check for a "?" as the next pattern character. It matches

PEGASUS_NAMESPACE_END

* any single character.

if (*pattern == '?') {

goto thisCharOK;

================================================================================

}

/* Check for a "[" as the next pattern character. It is followed

String optimizations:

* by a list of characters that are acceptable, or by a range

* (two characters separated by "-").

if (*pattern == '[') {

1. Added mechanism allowing certain functions to be inlined only when

pattern += 1;

used by internal Pegasus modules. External modules (i.e., providers)

while (1) {

link to a non-inline version, which allows for binary compatibility.

if ((*pattern == ']') || (*pattern == 0)) {

return 0;

}

if (_Equal(*pattern, *string, nocase)) {

break;

}

if (pattern[1] == '-') {

c2 = pattern[2];

if (c2 == 0) {

return 0;

}

if ((*pattern <= *string) && (c2 >= *string)) {

break;

}

if ((*pattern >= *string) && (c2 <= *string)) {

break;

}

pattern += 2;

}

pattern += 1;

}

while (*pattern != ']') {

if (*pattern == 0) {

pattern--;

break;

}

pattern += 1;

}

goto thisCharOK;

}

/* If the next pattern character is '/', just strip off the '/'

2. Implemented copy-on-write with atomic increment/decrement. This

* so we do exact matching on the character that follows.

yieled a 10% improvement for the 'gc' benchmark and a 11% improvment

for the 'ni1000' benchmark.

if (*pattern == '\\') {

3. Employed loop unrolling in several places. For example, see:

pattern += 1;

if (*pattern == 0) {

return 0;

}

/* There's no special character. Just make sure that the next

static Uint16* _find(const Uint16* s, size_t n, Uint16 c);

* characters of each string match.

if (!_Equal(*pattern, *string, nocase)) {

4. Used the "empty-rep" optimization (described in whitepaper from the

return 0;

GCC Developers Summit). This reduced default construction to a simple

}

pointer assignment.

thisCharOK: pattern += 1;

inline String::String() : _rep(&_emptyRep) { }

string += 1;

}

5. Implemented Uint16 versions of toupper() and tolower() using tables.

}

For example:

#endif

Boolean String::match(const String& str, const String& pattern)

static const char _upper[] =

{

return _StringMatch(

0,1,2,...255

(Uint16*)str.getData(), (Uint16*)pattern.getData(), 0) != 0;

};

}

Boolean String::matchNoCase(const String& str, const String& pattern)

inline Uint16 _toUpper(Uint16 x)

{

return _StringMatch(

return (x & 0xFF00) ? x : _upper[x];

(Uint16*)str.getData(), (Uint16*)pattern.getData(), 1) != 0;

}

This outperforms the system implementation by avoiding an anding

operation.

///////////////////////////////////////////////////////////////////////////////

6. Implemented char* version of the following member functions to

eliminate unecessary creation of anonymous string objects

// String-related functions

(temporaries).

///////////////////////////////////////////////////////////////////////////////

Boolean operator==(const String& str1, const String& str2)

String(const String& s1, const char* s2);

{

String(const char* s1, const String& s2);

return String::equal(str1, str2);

String& String::operator=(const char* str);

}

Uint32 String::find(const char* s) const;

bool String::equal(const String& s1, const char* s2);

static int String::compare(const String& s1, const char* s2);

String& String::append(const char* str);

String& String::append(const char* str, Uint32 size);

static bool String::equalNoCase(const String& s1, const char* s2);

String& operator=(const char* str)

String& String::assign(const char* str)

String& String::append(const char* str)

Boolean operator==(const String& s1, const char* s2)

Boolean operator==(const char* s1, const String& s2)

Boolean operator!=(const String& s1, const char* s2)

Boolean operator!=(const char* s1, const String& s2)

Boolean operator<(const String& s1, const char* s2)

Boolean operator<(const char* s1, const String& s2)

Boolean operator>(const String& s1, const char* s2)

Boolean operator>(const char* s1, const String& s2)

Boolean operator<=(const String& s1, const char* s2)

Boolean operator<=(const char* s1, const String& s2)

Boolean operator>=(const String& s1, const char* s2)

Boolean operator>=(const char* s1, const String& s2)

String operator+(const String& s1, const char* s2)

String operator+(const char* s1, const String& s2)

Boolean operator==(const String& str1, const char* str2)

7. Optimized _roundUpToPow2(), used in rounding the capacity to the next

{

power of two (algorithm from the book "Hacker's Delight").

return String::equal(str1, str2);

}

Boolean operator==(const char* str1, const String& str2)

static Uint32 _roundUpToPow2(Uint32 x)

{

return String::equal(str1, str2);

if (x < 8)

return 8;

x--;

x |= (x >> 1);

x |= (x >> 2);

x |= (x >> 4);

x |= (x >> 8);

x |= (x >> 16);

x++;

return x;

}

Boolean operator!=(const String& str1, const String& str2)

8. Implemented "concatenating constructors" to eliminate temporaries

created by operator+(). This scheme employs the "return-value

optimization" described by Stan Lippman.

inline String operator+(const String& s1, const String& s2)

{

return !String::equal(str1, str2);

return String(s1, s2, 0);

}

PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str1)

9. Experimented to find the optimial initial size for a short string.

{

Eight seems to offer the best tradeoff between space and time.

for (Uint32 i = 0, n = str1.size(); i < n; i++)

os << str1[i];

return os;

10. Inlined all members of the Char16 class.

}

String operator+(const String& str1, const String& str2)

11. Used Uint16 internally in the String class. This showed no improvememnt

{

since Char16 was already fully inlined and was essentially reduced to

return String(str1).append(str2);

Uint16 in any case.

}

Boolean operator<(const String& str1, const String& str2)

12. Implemented conditional logic (#if) allowing error checking logic to

{

be excluded to better performance. Examples include bounds checking

return String::compare(str1.getData(), str2.getData()) < 0;

and null-pointer checking.

}

Boolean operator<=(const String& str1, const String& str2)

13. Used memcpy() and memcmp() where possible. These are implemented using

{

the rep family of intructions under Intel and are much faster.

return String::compare(str1.getData(), str2.getData()) <= 0;

}

Boolean operator>(const String& str1, const String& str2)

14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8

{

copy routine overhead.

return String::compare(str1.getData(), str2.getData()) > 0;

}

Boolean operator>=(const String& str1, const String& str2)

15. Added ASCII7 form of the constructor and assign().

{

return String::compare(str1.getData(), str2.getData()) >= 0;

}

int CompareNoCase(const char* s1, const char* s2)

String s("hello world", String::ASCII7);

{

while (*s1 && *s2)

{

int r = tolower(*s1++) - tolower(*s2++);

if (r)

s.assignASCII7("hello world");

return r;

}

if (*s2)

This avoids slower UTF8 processing when not needed.

return -1;

else if (*s1)

return 1;

return 0;

================================================================================

}

int EqualNoCase(const char* s1, const char* s2)

TO-DO:

{

return CompareNoCase(s1, s2) == 0;

}

PEGASUS_NAMESPACE_END

(+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES

(+) [DONE] Submit BUG-2754 (Windows buffer limit).

(+) [DONE] Eliminate char versions of find() and append().

(+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h

(+) [DONE] Change _next_pow_2() to _roundUpToPow2().

(+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).

(+) [DONE] Comment StringRep allocation layout.

(+) [DONE] Conceal private inline functions.

(+) [DONE] Shorten inclusion of StringInline.h in String.h.

(+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get

rid of altogether.

(+) [DONE] useCamelNotationOnAllFunctionNames.

(+) [DONE] Check for overlow condition in StringRep::alloc().

(+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").

(+) [DONE] Fix throw-related memory leak.

(+) [DONE] Look at PEP223 for coding security guidelines.

(+) [DONE] Use old AtomicInt for now (split new AtomicInt into another

bug.

(+) [DONE] Removed appendASCII() and the ASCII form of the constructor.

-----------

(+) DOC++ String.h

================================================================================

Legend:

Removed from v.1.39
changed lines
	Added in v.1.111.6.15

No CVS admin address has been configured