pegasus/src/Pegasus/Common/String.cpp - diff

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/String.cpp between version 1.68 and 1.109

version 1.68, 2003/04/30 13:50:44

version 1.109, 2005/05/18 20:34:36

Line 1

//%/////////////////////////////////////////////////////////////////////////////

//%2005////////////////////////////////////////////////////////////////////////

// The Open Group, Tivoli Systems

// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.

// IBM Corp.; EMC Corporation, The Open Group.

// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.

// EMC Corporation; VERITAS Software Corporation; The Open Group.

// Permission is hereby granted, free of charge, to any person obtaining a copy

// of this software and associated documentation files (the "Software"), to

Line 24

Line 30

// Author: Mike Brasher (mbrasher@bmc.com)

// Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)

// Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297

//%/////////////////////////////////////////////////////////////////////////////

Line 32

Line 39

#include <cstring>

#include "String.h"

#include "Array.h"

#include "AutoPtr.h"

#include "InternalException.h"

#include <iostream>

#include <fstream>

#ifndef PEGASUS_REMOVE_DEPRECATED

#include <Pegasus/Common/CommonUTF.h>

#include "System.h" // for strcasecmp

#include "CommonUTF.h"

#ifdef PEGASUS_HAS_ICU

#include <unicode/ustring.h>

#include <unicode/uchar.h>

#endif

PEGASUS_USING_STD;

Line 56

Line 69

CString::CString(const CString& cstr)

{

_rep = 0;

if (cstr._rep)

{

_rep = (void*)new char[strlen((char*)cstr._rep)+1];

strcpy((char*)_rep, (char*)cstr._rep);

}

CString::CString(char* cstr)

: _rep(cstr)

Line 68

Line 86

CString::~CString()

{

if (_rep)

{

delete [] (char*)_rep;

}

CString& CString::operator=(const CString& cstr)

{

if (&cstr != this)

{

if (_rep)

{

delete [] (char*)_rep;

_rep = 0;

}

if (cstr._rep)

{

_rep = (char*)new char[strlen((char*)cstr._rep)+1];

strcpy((char*)_rep, (char*)cstr._rep);

}

return *this;

}

Line 91

Line 122

const String String::EMPTY = String();

Uint32 _strnlen(const char* str, Uint32 n)

inline Uint32 _StrLen(const Char16* str)

{

if (!str)

throw NullPointer();

for (Uint32 i=0; i<n; i++)

Uint32 n = 0;

{

if (!*str)

while (*str++)

{

n++;

return i;

}

return n;

}

Uint32 _strnlen(const Char16* str, Uint32 n)

// Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array.

// n is the length of the input char *, if stopAtTerm is 0

// A terminator character is appended to the end.

// Note that each input char is converted individually, which gives

// the fastest performance.

void _convertAndAppend(const char* str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm)

{

if (!str)

Uint32 i = 0;

throw NullPointer();

while ((stopAtTerm && *str) || (!stopAtTerm && i < n))

for (Uint32 i=0; i<n; i++)

{

if (!*str)

if (*(Uint8*)str <= 0x7f)

{

return i;

// Current byte sequence is in the us-ascii range.

}

c16a.append(Uint8(*str++));

}

else

return n;

}

inline Uint32 _StrLen(const char* str)

{

if (!str)

throw NullPointer();

// Current byte sequence is not in the us-ascii range.

return strlen(str);

// Check if the byte sequence is valid utf-8, and if so,

// call the converter to utf-16

Uint16 tgt[3];

tgt[1] = 0;

Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*str);

if ( (!stopAtTerm && i + c >= n) ||

(!isValid_U8((const Uint8 *)str, c+1)) )

{

// Note about error conditions.

// It is possible that the last utf-8 char before the

// end of input string extends past the end of the input string.

// This is caught in both cases -

// If counting up to n, then the test above catches it.

// If converting until terminator found, then a terminator

// in the middle of a multi-byte utf-8 char is invalid.

MessageLoaderParms parms("Common.String.BAD_UTF8",

"The byte sequence starting at index $0 is not valid UTF-8 encoding.",

i);

throw Exception(parms);

}

else

inline Uint32 _StrLen(const Char16* str)

{

if (!str)

// str is incremented by this call to the start of the next char

throw NullPointer();

Uint16 * tgtBuf = tgt;

UTF8toUTF16((const Uint8 **)&str, (Uint8 *)&str[c+1], &tgtBuf, &tgtBuf[2]);

c16a.append(tgt[0]);

if (tgt[1])

{

// Its a utf-16 surrogate pair (uses 2 Char16's)

c16a.append(tgt[1]);

}

Uint32 n = 0;

// bump by the trailing byte count

i += c;

}

while (*str++)

i++;

n++;

} // end while

return n;

c16a.append('\0');

}

class StringRep

Line 186

Line 243

String::String(const Char16* str)

{

if ( str == 0 )

{

throw NullPointer();

}

_rep = new StringRep(str);

}

String::String(const Char16* str, Uint32 n)

{

if ( str == 0 )

{

throw NullPointer();

}

_rep = new StringRep;

assign(str, n);

}

String::String(const char* str)

{

if ( str == 0 )

{

throw NullPointer();

}

_rep = new StringRep;

assign(str);

AutoPtr<StringRep> tempRep(_rep);

// An exception can be thrown, so use a temp AutoPtr.

_convertAndAppend(str, _rep->c16a, 0, 1);

tempRep.release();

}

String::String(const char* str, Uint32 n)

{

if ( str == 0 )

{

throw NullPointer();

}

_rep = new StringRep;

assign(str, n);

AutoPtr<StringRep> tempRep(_rep);

// An exception can be thrown, so use a temp AutoPtr.

_convertAndAppend(str, _rep->c16a, n, 0);

tempRep.release();

}

String::~String()

Line 214

Line 297

String& String::operator=(const String& str)

{

return assign(str);

if (&str != this)

{

assign(str);

}

return *this;

}

String& String::assign(const String& str)

Line 225

Line 312

String& String::assign(const Char16* str)

{

if ( str == 0 )

{

throw NullPointer();

}

_rep->c16a.clear();

_rep->c16a.append(str, _StrLen(str) + 1);

return *this;

Line 232

Line 324

String& String::assign(const Char16* str, Uint32 n)

{

if ( str == 0 )

{

throw NullPointer();

}

_rep->c16a.clear();

Uint32 m = _strnlen(str, n);

_rep->c16a.append(str, n);

_rep->c16a.append(str, m);

_rep->c16a.append('\0');

return *this;

}

String& String::assign(const char* str)

{

_rep->c16a.clear();

if ( str == 0 )

{

Uint32 n = strlen(str) + 1;

throw NullPointer();

_rep->c16a.reserveCapacity(n);

}

while (n--)

_rep->c16a.append(Uint8(*str++));

_rep->c16a.clear();

_convertAndAppend(str, _rep->c16a, 0, 1);

return *this;

}

String& String::assign(const char* str, Uint32 n)

{

_rep->c16a.clear();

if ( str == 0 )

{

Uint32 _n = _strnlen(str, n);

throw NullPointer();

_rep->c16a.reserveCapacity(_n + 1);

}

while (_n--)

_rep->c16a.append(Uint8(*str++));

_rep->c16a.append('\0');

_rep->c16a.clear();

_convertAndAppend(str, _rep->c16a, n, 0);

return *this;

}

Line 280

Line 372

Uint32 String::size() const

{

//#if defined (PEGASUS_OS_VMS)

// This prevents returning a minus number.

// Seems as though the first time through

// the XML parser something doesn't get

// initialized and there is no check for

// a negative number in the parser!

// Uint32 foo;

// foo = _rep->c16a.size();

// if (foo == 0)

// {

// return 0;

// }

// else

// {

// return (foo -1);

// }

//#else

return _rep->c16a.size() - 1;

//#endif

}

const Char16* String::getChar16Data() const

Line 288

Line 401

return _rep->c16a.getData();

}

CString String::getCString() const

{

Uint32 n = size() + 1;

char* str = new char[n];

char* p = str;

const Char16* q = getChar16Data();

for (Uint32 i = 0; i < n; i++)

{

Uint16 c = *q++;

*p++ = char(c);

//if (c & 0xff00)

// truncatedCharacters = true;

}

return CString(str);

}

Char16& String::operator[](Uint32 index)

{

if (index > size())

Line 331

Line 425

String& String::append(const Char16* str, Uint32 n)

{

Uint32 m = _strnlen(str, n);

if (str == 0)

_rep->c16a.reserveCapacity(_rep->c16a.size() + m);

{

throw NullPointer();

}

_rep->c16a.reserveCapacity(_rep->c16a.size() + n);

_rep->c16a.remove(_rep->c16a.size() - 1);

_rep->c16a.append(str, m);

_rep->c16a.append(str, n);

_rep->c16a.append('\0');

return *this;

}

Line 365

Line 463

return String(getChar16Data() + index, length);

}

else

return String();

}

Line 420

Line 518

Uint32 i = 1;

for (; i < subStrLen; i++)

if (*pStr++ != *p++ )

{pStr--; break;} // break from loop

{pStr-=i; break;} // break from loop

if (i == subStrLen)

return loc;

}

Line 442

Line 540

return PEG_NOT_FOUND;

}

// ATTN-RK-P3-20020509: Define case-sensitivity for non-English characters

void String::toLower()

{

#ifdef PEGASUS_HAS_ICU

if (InitializeICU::initICUSuccessful())

{

// This will do a locale-insensitive, but context-sensitive convert.

// Context-sensitive prevents any optimizations that try to

// convert just the ascii before calling ICU.

// The string may shrink or expand after the convert.

int32_t sz = size();

UChar* destbuf = new UChar[sz + 1];

const UChar* srcbuf = (const UChar *)getChar16Data();

UErrorCode err = U_ZERO_ERROR;

int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err);

if (err == U_BUFFER_OVERFLOW_ERROR)

{

delete [] destbuf;

destbuf = new UChar[needed + 1];

err = U_ZERO_ERROR;

u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err);

}

if (U_FAILURE(err))

{

delete [] destbuf;

throw Exception(u_errorName(err));

}

if (needed == sz)

{

Char16* from = (Char16*)destbuf;

for (Char16* to = &_rep->c16a[0]; *to; to++, from++)

{

*to = *from;

}

else

{

assign((Char16 *)destbuf, needed);

}

delete [] destbuf;

}

else

#endif

{

for (Char16* p = &_rep->c16a[0]; *p; p++)

{

if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)

*p = tolower(*p);

}

void String::toUpper()

{

#ifdef PEGASUS_HAS_ICU

if (InitializeICU::initICUSuccessful())

{

// This will do a locale-insensitive, but context-sensitive convert.

// Context-sensitive prevents any optimizations that try to

// convert just the ascii before calling ICU.

// The string may shrink or expand after the convert.

int32_t sz = size();

UChar* destbuf = new UChar[sz + 1];

const UChar* srcbuf = (const UChar *)getChar16Data();

UErrorCode err = U_ZERO_ERROR;

int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err);

if (err == U_BUFFER_OVERFLOW_ERROR)

{

delete [] destbuf;

destbuf = new UChar[needed + 1];

err = U_ZERO_ERROR;

u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err);

}

if (U_FAILURE(err))

{

delete [] destbuf;

throw Exception(u_errorName(err));

}

if (needed == sz)

{

Char16* from = (Char16*)destbuf;

for (Char16* to = &_rep->c16a[0]; *to; to++, from++)

{

*to = *from;

}

else

{

assign((Char16 *)destbuf, needed);

}

delete [] destbuf;

}

else

#endif

{

for (Char16* p = &_rep->c16a[0]; *p; p++)

{

if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)

*p = toupper(*p);

}

int String::compare(const String& s1, const String& s2, Uint32 n)

{

Line 491

Line 689

int String::compareNoCase(const String& s1, const String& s2)

{

#ifdef PEGASUS_HAS_ICU

if (InitializeICU::initICUSuccessful())

{

return u_strcasecmp((const UChar*)s1.getChar16Data(),

(const UChar*)s2.getChar16Data(),

U_FOLD_CASE_DEFAULT);

}

#endif

const Char16* _s1 = s1.getChar16Data();

const Char16* _s2 = s2.getChar16Data();

Line 527

Line 733

Boolean String::equalNoCase(const String& str1, const String& str2)

{

#ifdef PEGASUS_HAS_ICU

return compareNoCase(str1, str2) == 0;

#else

if (str1.size() != str2.size())

return false;

Line 548

Line 757

}

return true;

#endif

}

CString String::getCString() const

{

Uint32 n = 3*size() + 1;

char* str = new char[n];

const Char16* msg16 = getChar16Data();

const Uint16 *strsrc = (Uint16 *)msg16;

Uint16 *endsrc = (Uint16 *)&msg16[size()+1];

Uint8 *strtgt = (Uint8 *)str;

Uint8 *endtgt = (Uint8 *)&str[n];

UTF16toUTF8 (&strsrc,

endsrc,

&strtgt,

endtgt);

char* str1 = new char[strlen(str)+1];

strcpy(str1,str);

delete [] str;

return CString(str1);

}

#if 0

// ATTN-RK-P3-20020603: This code is not completely correct

// Wildcard String matching function that may be useful in the future

Line 565

Line 800

special characters in the pattern: *?\[] (see the manual

entry for details on what these mean).

Side effects: None.

Line 576

Line 812

inline Uint16 _ToLower(Uint16 ch)

{

// ICU_TODO: If ICU is available we should do this the correct way.

return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;

}

inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)

{

// ICU_TODO: If ICU is available we should do this the correct way.

if (nocase)

return _ToLower(ch1) == _ToLower(ch2);

else

return ch1 == ch2;

}

Line 709

Line 947

@parm str String containing the string to be matched\

@parm pattern GLOB style patterh to use in the match.

@return Boolean true if str matches patterh

@SeeAlso match

@see match

Boolean String::matchNoCase(const String& str, const String& pattern)

{

Line 747

Line 985

PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)

{

#ifdef PEGASUS_OS_OS400

int inc = 0;

#if defined(PEGASUS_OS_OS400)

int newbuf = 0;

CString cstr = str.getCString();

char *buffer = NULL;

const char* utf8str = cstr;

char buffer1[201];

char temp[2];

os << utf8str;

if (str.size() > 200)

{

#elif defined(PEGASUS_HAS_ICU)

buffer = new char[str.size()+1];

if (InitializeICU::initICUSuccessful())

newbuf = 1;

{

char *buf = NULL;

const int size = str.size() * 6;

UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());

Uint32 bufsize = UniStr.extract(0,size,buf);

buf = new char[bufsize+1];

UniStr.extract(0,bufsize,buf);

os << buf;

os.flush();

delete [] buf;

}

else

buffer = buffer1;

#endif // End of PEGASUS_HAS_ICU #else leg.

#endif

{

for (Uint32 i = 0, n = str.size(); i < n; i++)

{

Uint16 code = str[i];

if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)

{

#ifdef PEGASUS_OS_OS400

// process so messages don't get displayed as one char per line on OS/400.

// Uint16 is a 2 byte character where byte 1 is '00' and byte 2 is

// the character. Also, the entire string needs to be sent to os instead

// of one "byte/Unit16" at a time. Sending one "byte/Uint16" at a time also

// causes one character per line. On OS/400 use of os << char(code) is a

// restriction and no available c/cpp alternative was available. The

// following was created to compensate for this restriction.

memcpy(temp, &code, 2);

memcpy(buffer+inc, &temp[1], 1); // do not include the '00'

if ((i+1) == n) // last character

{

memset(buffer+n, 0x00, 1); // add null terminator

os << buffer; // return 1-byte per character string

if (buffer && newbuf != 0)

delete [] buffer; // okay; this is the end of the loop

}

inc++;

#else

os << char(code);

#endif

}

else

{

Line 798

Line 1025

os << buffer;

}

return os;

}

Line 827

Line 1055

return String::compare(str1, str2) >= 0;

}

#ifndef PEGASUS_REMOVE_DEPRECATED

int CompareNoCase(const char* s1, const char* s2)

{

return System::strcasecmp(s1, s2);

}

#endif

PEGASUS_NAMESPACE_END

Legend:

Removed from v.1.68
changed lines
	Added in v.1.109

No CVS admin address has been configured