pegasus/src/Pegasus/Common/String.cpp - diff

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/String.cpp between version 1.95 and 1.102

version 1.95, 2004/06/15 18:38:24

version 1.102, 2005/04/23 18:32:14

Line 1

//%2003////////////////////////////////////////////////////////////////////////

//%2005////////////////////////////////////////////////////////////////////////

// Company, L. P., IBM Corp., The Open Group, Tivoli Systems.

// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.

// IBM Corp.; EMC Corporation, The Open Group.

// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.

// EMC Corporation; VERITAS Software Corporation; The Open Group.

// Permission is hereby granted, free of charge, to any person obtaining a copy

// of this software and associated documentation files (the "Software"), to

Line 26

Line 30

// Author: Mike Brasher (mbrasher@bmc.com)

// Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)

// Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297

//%/////////////////////////////////////////////////////////////////////////////

Line 41

Line 46

#include "CommonUTF.h"

#ifdef PEGASUS_HAS_ICU

#include <unicode/unistr.h>

#include <unicode/ustring.h>

#include <unicode/uchar.h>

#endif

PEGASUS_USING_STD;

Line 114

Line 120

const String String::EMPTY = String();

Uint32 _strnlen(const char* str, Uint32 n)

inline Uint32 _StrLen(const Char16* str)

{

if (!str)

throw NullPointer();

for (Uint32 i=0; i<n; i++)

Uint32 n = 0;

{

if (!*str)

while (*str++)

{

n++;

return i;

}

return n;

}

Uint32 _strnlen(const Char16* str, Uint32 n)

// Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array.

// n is the length of the input char *, if stopAtTerm is 0

// A terminator character is appended to the end.

// Note that each input char is converted individually, which gives

// the fastest performance.

void _convertAndAppend(const char* str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm)

{

if (!str)

Uint32 i = 0;

throw NullPointer();

while ((stopAtTerm && *str) || (!stopAtTerm && i < n))

for (Uint32 i=0; i<n; i++)

{

if (!*str)

if (*(Uint8*)str <= 0x7f)

{

return i;

// Current byte sequence is in the us-ascii range.

}

c16a.append(Uint8(*str++));

}

return n;

}

else

inline Uint32 _StrLen(const char* str)

{

if (!str)

throw NullPointer();

// Current byte sequence is not in the us-ascii range.

return strlen(str);

// Check if the byte sequence is valid utf-8, and if so,

// call the converter to utf-16

Uint16 tgt[3];

tgt[1] = 0;

Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*str);

if ( (!stopAtTerm && i + c >= n) ||

(!isValid_U8((const Uint8 *)str, c+1)) )

{

// Note about error conditions.

// It is possible that the last utf-8 char before the

// end of input string extends past the end of the input string.

// This is caught in both cases -

// If counting up to n, then the test above catches it.

// If converting until terminator found, then a terminator

// in the middle of a multi-byte utf-8 char is invalid.

MessageLoaderParms parms("Common.String.BAD_UTF8",

"The byte sequence starting at index $0 is not valid UTF-8 encoding.",

i);

throw Exception(parms);

}

else

inline Uint32 _StrLen(const Char16* str)

{

if (!str)

// str is incremented by this call to the start of the next char

throw NullPointer();

Uint16 * tgtBuf = tgt;

UTF8toUTF16((const Uint8 **)&str, (Uint8 *)&str[c+1], &tgtBuf, &tgtBuf[2]);

c16a.append(tgt[0]);

if (tgt[1])

{

// Its a utf-16 surrogate pair (uses 2 Char16's)

c16a.append(tgt[1]);

}

Uint32 n = 0;

// bump by the trailing byte count

i += c;

}

while (*str++)

i++;

n++;

} // end while

return n;

c16a.append('\0');

}

class StringRep

Line 221

Line 253

String::String(const char* str)

{

_rep = new StringRep;

assign(str);

_convertAndAppend(str, _rep->c16a, 0, 1);

}

String::String(const char* str, Uint32 n)

{

_rep = new StringRep;

assign(str, n);

_convertAndAppend(str, _rep->c16a, n, 0);

}

String::~String()

Line 260

Line 292

String& String::assign(const Char16* str, Uint32 n)

{

_rep->c16a.clear();

Uint32 m = _strnlen(str, n);

_rep->c16a.append(str, n);

_rep->c16a.append(str, m);

_rep->c16a.append('\0');

return *this;

}

String& String::assign(const char* str, Uint32 n)

String& String::assign(const char* str)

{

char *tmpStr = new char[n+1];

_rep->c16a.clear();

memset(tmpStr,0x00,n+1);

_convertAndAppend(str, _rep->c16a, 0, 1);

return *this;

strncpy(tmpStr,str,n);

}

assign(tmpStr);

delete tmpStr;

String& String::assign(const char* str, Uint32 n)

{

_rep->c16a.clear();

_convertAndAppend(str, _rep->c16a, n, 0);

return *this;

}

Line 323

Line 356

String& String::append(const Char16* str, Uint32 n)

{

Uint32 m = _strnlen(str, n);

if (!str)

_rep->c16a.reserveCapacity(_rep->c16a.size() + m);

{

throw NullPointer();

}

_rep->c16a.reserveCapacity(_rep->c16a.size() + n);

_rep->c16a.remove(_rep->c16a.size() - 1);

_rep->c16a.append(str, m);

_rep->c16a.append(str, n);

_rep->c16a.append('\0');

return *this;

}

Line 436

Line 472

void String::toLower()

{

const char * noLocale = NULL;

String::toLower(noLocale);

}

void String::toLower(const char * strLocale)

{

#ifdef PEGASUS_HAS_ICU

UnicodeString UniStr((const UChar *)_rep->c16a.getData());

// This will do a locale-insensitive, but context-sensitive convert.

if(strLocale == NULL)

// Context-sensitive prevents any optimizations that try to

// convert just the ascii before calling ICU.

// The string may shrink or expand after the convert.

int32_t sz = size();

UChar* destbuf = new UChar[sz + 1];

const UChar* srcbuf = (const UChar *)getChar16Data();

UErrorCode err = U_ZERO_ERROR;

int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err);

if (err == U_BUFFER_OVERFLOW_ERROR)

{

UniStr.toLower();

delete [] destbuf;

destbuf = new UChar[needed + 1];

err = U_ZERO_ERROR;

u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err);

}

else

if (U_FAILURE(err))

{

Locale loc(strLocale);

delete [] destbuf;

if(loc.isBogus())

throw Exception(u_errorName(err));

}

if (needed == sz)

{

throw InvalidNameException(String(strLocale));

Char16* from = (Char16*)destbuf;

for (Char16* to = &_rep->c16a[0]; *to; to++, from++)

{

*to = *from;

}

UniStr.toLower(loc);

else

{

assign((Char16 *)destbuf, needed);

}

UniStr.append((UChar)'\0');

assign((Char16*)UniStr.getBuffer());

delete [] destbuf;

#else

for (Char16* p = &_rep->c16a[0]; *p; p++)

{

Line 468

Line 520

#endif

}

void String::toUpper(const char * strLocale)

void String::toUpper()

{

#ifdef PEGASUS_HAS_ICU

UnicodeString UniStr((const UChar *)_rep->c16a.getData());

// This will do a locale-insensitive, but context-sensitive convert.

if(strLocale == NULL)

// Context-sensitive prevents any optimizations that try to

// convert just the ascii before calling ICU.

// The string may shrink or expand after the convert.

int32_t sz = size();

UChar* destbuf = new UChar[sz + 1];

const UChar* srcbuf = (const UChar *)getChar16Data();

UErrorCode err = U_ZERO_ERROR;

int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err);

if (err == U_BUFFER_OVERFLOW_ERROR)

{

UniStr.toUpper();

delete [] destbuf;

destbuf = new UChar[needed + 1];

err = U_ZERO_ERROR;

u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err);

}

else

if (U_FAILURE(err))

{

delete [] destbuf;

throw Exception(u_errorName(err));

}

if (needed == sz)

{

Locale loc(strLocale);

Char16* from = (Char16*)destbuf;

if(loc.isBogus())

for (Char16* to = &_rep->c16a[0]; *to; to++, from++)

{

throw InvalidNameException(String(strLocale));

*to = *from;

}

UniStr.toUpper(loc);

}

UniStr.append((UChar)'\0');

else

{

assign((Char16 *)destbuf, needed);

}

assign((Char16*)UniStr.getBuffer());

delete [] destbuf;

#else

for (Char16* p = &_rep->c16a[0]; *p; p++)

{

Line 536

Line 609

int String::compareNoCase(const String& s1, const String& s2)

{

const char * noLocale = NULL;

return String::compareNoCase(s1, s2, noLocale);

}

int String::compareNoCase(const String& s1, const String& s2,const char * strLocale)

{

#ifdef PEGASUS_HAS_ICU

UnicodeString UniStr1((const UChar *)s1.getChar16Data(), (int32_t)s1.size());

return u_strcasecmp((const UChar*)s1.getChar16Data(),

UnicodeString UniStr2((const UChar *)s2.getChar16Data(), (int32_t)s2.size());

(const UChar*)s2.getChar16Data(),

if(strLocale == NULL)

U_FOLD_CASE_DEFAULT);

{

UniStr1.toLower();

UniStr2.toLower();

}

else

{

Locale loc(strLocale);

if(loc.isBogus())

{

throw InvalidNameException(String(strLocale));

}

UniStr1.toLower(loc);

UniStr2.toLower(loc);

}

// Note: the ICU 2.6.1 documentation for UnicodeString::compare( ) is

// backwards! The API actually returns +1 if this is greater than text.

// This is why the line below appears wrong based on the 2.6.1 docs.

// (ref. bugzilla 1207)

return (UniStr1.compare(UniStr2));

#else

const Char16* _s1 = s1.getChar16Data();

const Char16* _s2 = s2.getChar16Data();

Line 603

Line 651

Boolean String::equalNoCase(const String& str1, const String& str2)

{

const char * noLocale = NULL;

return String::equalNoCase(str1, str2, noLocale);

}

Boolean String::equalNoCase(const String& str1, const String& str2,const char * strLocale)

{

#ifdef PEGASUS_HAS_ICU

UnicodeString UniStr1((const UChar *)str1.getChar16Data(), (int32_t)str1.size());

return compareNoCase(str1, str2) == 0;

UnicodeString UniStr2((const UChar *)str2.getChar16Data(), (int32_t)str2.size());

if(strLocale == NULL)

{

UniStr1.toLower();

UniStr2.toLower();

}

else

{

Locale loc(strLocale);

if(loc.isBogus())

{

throw InvalidNameException(String(strLocale));

}

UniStr1.toLower(loc);

UniStr2.toLower(loc);

}

return (UniStr1 == UniStr2);

#else

if (str1.size() != str2.size())

return false;

Line 653

Line 678

#endif

}

// UTF8 specific code:

String& String::assign(const char* str)

{

_rep->c16a.clear();

Uint32 n = strlen(str) + 1;

const Uint8 *strsrc = (Uint8 *)str;

Uint8 *endsrc = (Uint8 *)&str[n-1];

Char16 *msg16 = new Char16[n];

Uint16 *strtgt = (Uint16 *)msg16;

Uint16 *endtgt = (Uint16 *)&msg16[n];

UTF8toUTF16(&strsrc,

endsrc,

&strtgt,

endtgt);

Uint32 count;

for(count = 0; ((msg16[count]) != Char16(0x00)) && (count < (n - 1)); ++count);

_rep->c16a.append(msg16, count);

_rep->c16a.append('\0');

delete [] msg16;

return *this;

}

CString String::getCString() const

{

Line 916

Line 911

os << utf8str;

#elif defined(PEGASUS_HAS_ICU)

if(os == cout || os == cerr){

char *buf = NULL;

const int size = str.size() * 6;

UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());

Line 927

Line 921

os << buf;

os.flush();

delete [] buf;

}else{

CString cstr = str.getCString();

const char* utf8str = cstr;

os << utf8str;

}

#else

for (Uint32 i = 0, n = str.size(); i < n; i++)

{

Legend:

Removed from v.1.95
changed lines
	Added in v.1.102

No CVS admin address has been configured