version 1.66, 2003/01/25 14:38:58
|
version 1.109, 2005/05/18 20:34:36
|
|
|
//%///////////////////////////////////////////////////////////////////////////// |
//%2005//////////////////////////////////////////////////////////////////////// |
// | // |
// Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM, |
// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development |
// The Open Group, Tivoli Systems |
// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. |
|
// Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.; |
|
// IBM Corp.; EMC Corporation, The Open Group. |
|
// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; |
|
// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. |
|
// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
|
// EMC Corporation; VERITAS Software Corporation; The Open Group. |
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a copy | // Permission is hereby granted, free of charge, to any person obtaining a copy |
// of this software and associated documentation files (the "Software"), to | // of this software and associated documentation files (the "Software"), to |
|
|
// Author: Mike Brasher (mbrasher@bmc.com) | // Author: Mike Brasher (mbrasher@bmc.com) |
// | // |
// Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) | // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) |
|
// Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297 |
// | // |
//%///////////////////////////////////////////////////////////////////////////// | //%///////////////////////////////////////////////////////////////////////////// |
| |
|
|
#include <cstring> | #include <cstring> |
#include "String.h" | #include "String.h" |
#include "Array.h" | #include "Array.h" |
|
#include "AutoPtr.h" |
#include "InternalException.h" | #include "InternalException.h" |
#include <iostream> | #include <iostream> |
#include <fstream> | #include <fstream> |
#ifndef PEGASUS_REMOVE_DEPRECATED |
#include <Pegasus/Common/CommonUTF.h> |
#include "System.h" // for strcasecmp |
|
|
#include "CommonUTF.h" |
|
|
|
#ifdef PEGASUS_HAS_ICU |
|
#include <unicode/ustring.h> |
|
#include <unicode/uchar.h> |
#endif | #endif |
| |
PEGASUS_USING_STD; | PEGASUS_USING_STD; |
|
|
| |
CString::CString(const CString& cstr) | CString::CString(const CString& cstr) |
{ | { |
|
_rep = 0; |
|
|
|
if (cstr._rep) |
|
{ |
_rep = (void*)new char[strlen((char*)cstr._rep)+1]; | _rep = (void*)new char[strlen((char*)cstr._rep)+1]; |
strcpy((char*)_rep, (char*)cstr._rep); | strcpy((char*)_rep, (char*)cstr._rep); |
} | } |
|
} |
| |
CString::CString(char* cstr) | CString::CString(char* cstr) |
: _rep(cstr) | : _rep(cstr) |
|
|
CString::~CString() | CString::~CString() |
{ | { |
if (_rep) | if (_rep) |
|
{ |
delete [] (char*)_rep; | delete [] (char*)_rep; |
} | } |
|
} |
| |
CString& CString::operator=(const CString& cstr) | CString& CString::operator=(const CString& cstr) |
{ | { |
|
if (&cstr != this) |
|
{ |
|
if (_rep) |
|
{ |
|
delete [] (char*)_rep; |
|
_rep = 0; |
|
} |
|
if (cstr._rep) |
|
{ |
_rep = (char*)new char[strlen((char*)cstr._rep)+1]; | _rep = (char*)new char[strlen((char*)cstr._rep)+1]; |
strcpy((char*)_rep, (char*)cstr._rep); | strcpy((char*)_rep, (char*)cstr._rep); |
|
} |
|
} |
return *this; | return *this; |
} | } |
| |
|
|
| |
const String String::EMPTY = String(); | const String String::EMPTY = String(); |
| |
Uint32 _strnlen(const char* str, Uint32 n) |
inline Uint32 _StrLen(const Char16* str) |
{ | { |
if (!str) | if (!str) |
throw NullPointer(); | throw NullPointer(); |
| |
for (Uint32 i=0; i<n; i++) |
Uint32 n = 0; |
{ |
|
if (!*str) |
while (*str++) |
{ |
n++; |
return i; |
|
} |
|
} |
|
| |
return n; | return n; |
} | } |
| |
Uint32 _strnlen(const Char16* str, Uint32 n) |
// |
|
// Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array. |
|
// n is the length of the input char *, if stopAtTerm is 0 |
|
// A terminator character is appended to the end. |
|
// Note that each input char is converted individually, which gives |
|
// the fastest performance. |
|
// |
|
void _convertAndAppend(const char* str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm) |
{ | { |
if (!str) |
Uint32 i = 0; |
throw NullPointer(); |
while ((stopAtTerm && *str) || (!stopAtTerm && i < n)) |
|
|
for (Uint32 i=0; i<n; i++) |
|
{ | { |
if (!*str) |
if (*(Uint8*)str <= 0x7f) |
{ | { |
return i; |
// Current byte sequence is in the us-ascii range. |
|
c16a.append(Uint8(*str++)); |
} | } |
} |
else |
|
|
return n; |
|
} |
|
|
|
inline Uint32 _StrLen(const char* str) |
|
{ | { |
if (!str) |
// |
throw NullPointer(); |
// Current byte sequence is not in the us-ascii range. |
|
// |
| |
return strlen(str); |
// Check if the byte sequence is valid utf-8, and if so, |
|
// call the converter to utf-16 |
|
Uint16 tgt[3]; |
|
tgt[1] = 0; |
|
Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*str); |
|
if ( (!stopAtTerm && i + c >= n) || |
|
(!isValid_U8((const Uint8 *)str, c+1)) ) |
|
{ |
|
// Note about error conditions. |
|
// It is possible that the last utf-8 char before the |
|
// end of input string extends past the end of the input string. |
|
// This is caught in both cases - |
|
// If counting up to n, then the test above catches it. |
|
// If converting until terminator found, then a terminator |
|
// in the middle of a multi-byte utf-8 char is invalid. |
|
MessageLoaderParms parms("Common.String.BAD_UTF8", |
|
"The byte sequence starting at index $0 is not valid UTF-8 encoding.", |
|
i); |
|
throw Exception(parms); |
} | } |
|
else |
inline Uint32 _StrLen(const Char16* str) |
|
{ | { |
if (!str) |
// str is incremented by this call to the start of the next char |
throw NullPointer(); |
Uint16 * tgtBuf = tgt; |
|
UTF8toUTF16((const Uint8 **)&str, (Uint8 *)&str[c+1], &tgtBuf, &tgtBuf[2]); |
|
c16a.append(tgt[0]); |
|
if (tgt[1]) |
|
{ |
|
// Its a utf-16 surrogate pair (uses 2 Char16's) |
|
c16a.append(tgt[1]); |
|
} |
| |
Uint32 n = 0; |
// bump by the trailing byte count |
|
i += c; |
|
} |
|
} |
| |
while (*str++) |
i++; |
n++; |
} // end while |
| |
return n; |
c16a.append('\0'); |
} | } |
| |
class StringRep | class StringRep |
|
|
| |
String::String(const Char16* str) | String::String(const Char16* str) |
{ | { |
|
if ( str == 0 ) |
|
{ |
|
throw NullPointer(); |
|
} |
|
|
_rep = new StringRep(str); | _rep = new StringRep(str); |
} | } |
| |
String::String(const Char16* str, Uint32 n) | String::String(const Char16* str, Uint32 n) |
{ | { |
|
if ( str == 0 ) |
|
{ |
|
throw NullPointer(); |
|
} |
|
|
_rep = new StringRep; | _rep = new StringRep; |
assign(str, n); | assign(str, n); |
} | } |
| |
String::String(const char* str) | String::String(const char* str) |
{ | { |
|
if ( str == 0 ) |
|
{ |
|
throw NullPointer(); |
|
} |
|
|
_rep = new StringRep; | _rep = new StringRep; |
assign(str); |
AutoPtr<StringRep> tempRep(_rep); |
|
// An exception can be thrown, so use a temp AutoPtr. |
|
_convertAndAppend(str, _rep->c16a, 0, 1); |
|
tempRep.release(); |
} | } |
| |
String::String(const char* str, Uint32 n) | String::String(const char* str, Uint32 n) |
{ | { |
|
if ( str == 0 ) |
|
{ |
|
throw NullPointer(); |
|
} |
|
|
_rep = new StringRep; | _rep = new StringRep; |
assign(str, n); |
AutoPtr<StringRep> tempRep(_rep); |
|
// An exception can be thrown, so use a temp AutoPtr. |
|
_convertAndAppend(str, _rep->c16a, n, 0); |
|
tempRep.release(); |
} | } |
| |
String::~String() | String::~String() |
|
|
| |
String& String::operator=(const String& str) | String& String::operator=(const String& str) |
{ | { |
return assign(str); |
if (&str != this) |
|
{ |
|
assign(str); |
|
} |
|
return *this; |
} | } |
| |
String& String::assign(const String& str) | String& String::assign(const String& str) |
|
|
| |
String& String::assign(const Char16* str) | String& String::assign(const Char16* str) |
{ | { |
|
if ( str == 0 ) |
|
{ |
|
throw NullPointer(); |
|
} |
|
|
_rep->c16a.clear(); | _rep->c16a.clear(); |
_rep->c16a.append(str, _StrLen(str) + 1); | _rep->c16a.append(str, _StrLen(str) + 1); |
return *this; | return *this; |
|
|
| |
String& String::assign(const Char16* str, Uint32 n) | String& String::assign(const Char16* str, Uint32 n) |
{ | { |
|
if ( str == 0 ) |
|
{ |
|
throw NullPointer(); |
|
} |
|
|
_rep->c16a.clear(); | _rep->c16a.clear(); |
Uint32 m = _strnlen(str, n); |
_rep->c16a.append(str, n); |
_rep->c16a.append(str, m); |
|
_rep->c16a.append('\0'); | _rep->c16a.append('\0'); |
return *this; | return *this; |
} | } |
| |
String& String::assign(const char* str) | String& String::assign(const char* str) |
{ | { |
_rep->c16a.clear(); |
if ( str == 0 ) |
|
{ |
Uint32 n = strlen(str) + 1; |
throw NullPointer(); |
_rep->c16a.reserveCapacity(n); |
} |
|
|
while (n--) |
|
_rep->c16a.append(*str++); |
|
| |
|
_rep->c16a.clear(); |
|
_convertAndAppend(str, _rep->c16a, 0, 1); |
return *this; | return *this; |
} | } |
| |
String& String::assign(const char* str, Uint32 n) | String& String::assign(const char* str, Uint32 n) |
{ | { |
_rep->c16a.clear(); |
if ( str == 0 ) |
|
{ |
Uint32 _n = _strnlen(str, n); |
throw NullPointer(); |
_rep->c16a.reserveCapacity(_n + 1); |
} |
|
|
while (_n--) |
|
_rep->c16a.append(*str++); |
|
|
|
_rep->c16a.append('\0'); |
|
| |
|
_rep->c16a.clear(); |
|
_convertAndAppend(str, _rep->c16a, n, 0); |
return *this; | return *this; |
} | } |
| |
|
|
| |
Uint32 String::size() const | Uint32 String::size() const |
{ | { |
|
//#if defined (PEGASUS_OS_VMS) |
|
// |
|
// This prevents returning a minus number. |
|
// |
|
// Seems as though the first time through |
|
// the XML parser something doesn't get |
|
// initialized and there is no check for |
|
// a negative number in the parser! |
|
// |
|
// Uint32 foo; |
|
// foo = _rep->c16a.size(); |
|
// if (foo == 0) |
|
// { |
|
// return 0; |
|
// } |
|
// else |
|
// { |
|
// return (foo -1); |
|
// } |
|
//#else |
return _rep->c16a.size() - 1; | return _rep->c16a.size() - 1; |
|
//#endif |
} | } |
| |
const Char16* String::getChar16Data() const | const Char16* String::getChar16Data() const |
|
|
return _rep->c16a.getData(); | return _rep->c16a.getData(); |
} | } |
| |
CString String::getCString() const |
|
{ |
|
Uint32 n = size() + 1; |
|
char* str = new char[n]; |
|
char* p = str; |
|
const Char16* q = getChar16Data(); |
|
|
|
for (Uint32 i = 0; i < n; i++) |
|
{ |
|
Uint16 c = *q++; |
|
*p++ = char(c); |
|
|
|
//if (c & 0xff00) |
|
// truncatedCharacters = true; |
|
} |
|
|
|
return CString(str); |
|
} |
|
|
|
Char16& String::operator[](Uint32 index) | Char16& String::operator[](Uint32 index) |
{ | { |
if (index > size()) | if (index > size()) |
|
|
| |
String& String::append(const Char16* str, Uint32 n) | String& String::append(const Char16* str, Uint32 n) |
{ | { |
Uint32 m = _strnlen(str, n); |
if (str == 0) |
_rep->c16a.reserveCapacity(_rep->c16a.size() + m); |
{ |
|
throw NullPointer(); |
|
} |
|
|
|
_rep->c16a.reserveCapacity(_rep->c16a.size() + n); |
_rep->c16a.remove(_rep->c16a.size() - 1); | _rep->c16a.remove(_rep->c16a.size() - 1); |
_rep->c16a.append(str, m); |
_rep->c16a.append(str, n); |
_rep->c16a.append('\0'); | _rep->c16a.append('\0'); |
return *this; | return *this; |
} | } |
|
|
| |
return String(getChar16Data() + index, length); | return String(getChar16Data() + index, length); |
} | } |
else |
|
return String(); | return String(); |
} | } |
| |
|
|
Uint32 i = 1; | Uint32 i = 1; |
for (; i < subStrLen; i++) | for (; i < subStrLen; i++) |
if (*pStr++ != *p++ ) | if (*pStr++ != *p++ ) |
{pStr--; break;} // break from loop |
{pStr-=i; break;} // break from loop |
if (i == subStrLen) | if (i == subStrLen) |
return loc; | return loc; |
} | } |
|
|
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
} | } |
| |
// ATTN-RK-P3-20020509: Define case-sensitivity for non-English characters |
|
void String::toLower() | void String::toLower() |
{ | { |
|
#ifdef PEGASUS_HAS_ICU |
|
if (InitializeICU::initICUSuccessful()) |
|
{ |
|
// This will do a locale-insensitive, but context-sensitive convert. |
|
// Context-sensitive prevents any optimizations that try to |
|
// convert just the ascii before calling ICU. |
|
// The string may shrink or expand after the convert. |
|
|
|
int32_t sz = size(); |
|
UChar* destbuf = new UChar[sz + 1]; |
|
const UChar* srcbuf = (const UChar *)getChar16Data(); |
|
UErrorCode err = U_ZERO_ERROR; |
|
|
|
int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err); |
|
if (err == U_BUFFER_OVERFLOW_ERROR) |
|
{ |
|
delete [] destbuf; |
|
destbuf = new UChar[needed + 1]; |
|
err = U_ZERO_ERROR; |
|
u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err); |
|
} |
|
if (U_FAILURE(err)) |
|
{ |
|
delete [] destbuf; |
|
throw Exception(u_errorName(err)); |
|
} |
|
|
|
if (needed == sz) |
|
{ |
|
Char16* from = (Char16*)destbuf; |
|
for (Char16* to = &_rep->c16a[0]; *to; to++, from++) |
|
{ |
|
*to = *from; |
|
} |
|
} |
|
else |
|
{ |
|
assign((Char16 *)destbuf, needed); |
|
} |
|
|
|
delete [] destbuf; |
|
} |
|
else |
|
#endif |
|
{ |
for (Char16* p = &_rep->c16a[0]; *p; p++) | for (Char16* p = &_rep->c16a[0]; *p; p++) |
{ | { |
if (*p <= PEGASUS_MAX_PRINTABLE_CHAR) | if (*p <= PEGASUS_MAX_PRINTABLE_CHAR) |
*p = tolower(*p); | *p = tolower(*p); |
} | } |
} | } |
|
} |
|
|
|
void String::toUpper() |
|
{ |
|
#ifdef PEGASUS_HAS_ICU |
|
if (InitializeICU::initICUSuccessful()) |
|
{ |
|
// This will do a locale-insensitive, but context-sensitive convert. |
|
// Context-sensitive prevents any optimizations that try to |
|
// convert just the ascii before calling ICU. |
|
// The string may shrink or expand after the convert. |
|
|
|
int32_t sz = size(); |
|
UChar* destbuf = new UChar[sz + 1]; |
|
const UChar* srcbuf = (const UChar *)getChar16Data(); |
|
UErrorCode err = U_ZERO_ERROR; |
|
|
|
int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err); |
|
if (err == U_BUFFER_OVERFLOW_ERROR) |
|
{ |
|
delete [] destbuf; |
|
destbuf = new UChar[needed + 1]; |
|
err = U_ZERO_ERROR; |
|
u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err); |
|
} |
|
if (U_FAILURE(err)) |
|
{ |
|
delete [] destbuf; |
|
throw Exception(u_errorName(err)); |
|
} |
|
|
|
if (needed == sz) |
|
{ |
|
Char16* from = (Char16*)destbuf; |
|
for (Char16* to = &_rep->c16a[0]; *to; to++, from++) |
|
{ |
|
*to = *from; |
|
} |
|
} |
|
else |
|
{ |
|
assign((Char16 *)destbuf, needed); |
|
} |
|
|
|
delete [] destbuf; |
|
} |
|
else |
|
#endif |
|
{ |
|
for (Char16* p = &_rep->c16a[0]; *p; p++) |
|
{ |
|
if (*p <= PEGASUS_MAX_PRINTABLE_CHAR) |
|
*p = toupper(*p); |
|
} |
|
} |
|
} |
| |
int String::compare(const String& s1, const String& s2, Uint32 n) | int String::compare(const String& s1, const String& s2, Uint32 n) |
{ | { |
|
|
| |
int String::compareNoCase(const String& s1, const String& s2) | int String::compareNoCase(const String& s1, const String& s2) |
{ | { |
|
#ifdef PEGASUS_HAS_ICU |
|
if (InitializeICU::initICUSuccessful()) |
|
{ |
|
return u_strcasecmp((const UChar*)s1.getChar16Data(), |
|
(const UChar*)s2.getChar16Data(), |
|
U_FOLD_CASE_DEFAULT); |
|
} |
|
#endif |
const Char16* _s1 = s1.getChar16Data(); | const Char16* _s1 = s1.getChar16Data(); |
const Char16* _s2 = s2.getChar16Data(); | const Char16* _s2 = s2.getChar16Data(); |
| |
|
|
| |
Boolean String::equalNoCase(const String& str1, const String& str2) | Boolean String::equalNoCase(const String& str1, const String& str2) |
{ | { |
|
#ifdef PEGASUS_HAS_ICU |
|
return compareNoCase(str1, str2) == 0; |
|
#else |
if (str1.size() != str2.size()) | if (str1.size() != str2.size()) |
return false; | return false; |
| |
|
|
} | } |
| |
return true; | return true; |
|
#endif |
} | } |
| |
| |
|
CString String::getCString() const |
|
{ |
|
Uint32 n = 3*size() + 1; |
|
char* str = new char[n]; |
|
|
|
const Char16* msg16 = getChar16Data(); |
|
|
|
const Uint16 *strsrc = (Uint16 *)msg16; |
|
Uint16 *endsrc = (Uint16 *)&msg16[size()+1]; |
|
|
|
Uint8 *strtgt = (Uint8 *)str; |
|
Uint8 *endtgt = (Uint8 *)&str[n]; |
|
|
|
UTF16toUTF8 (&strsrc, |
|
endsrc, |
|
&strtgt, |
|
endtgt); |
|
|
|
char* str1 = new char[strlen(str)+1]; |
|
strcpy(str1,str); |
|
delete [] str; |
|
|
|
return CString(str1); |
|
} |
|
|
#if 0 | #if 0 |
// ATTN-RK-P3-20020603: This code is not completely correct | // ATTN-RK-P3-20020603: This code is not completely correct |
// Wildcard String matching function that may be useful in the future | // Wildcard String matching function that may be useful in the future |
|
|
special characters in the pattern: *?\[] (see the manual | special characters in the pattern: *?\[] (see the manual |
entry for details on what these mean). | entry for details on what these mean). |
| |
|
|
Side effects: None. | Side effects: None. |
*/ | */ |
| |
|
|
| |
inline Uint16 _ToLower(Uint16 ch) | inline Uint16 _ToLower(Uint16 ch) |
{ | { |
|
// ICU_TODO: If ICU is available we should do this the correct way. |
return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch; | return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch; |
} | } |
| |
inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase) | inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase) |
{ | { |
|
// ICU_TODO: If ICU is available we should do this the correct way. |
if (nocase) | if (nocase) |
return _ToLower(ch1) == _ToLower(ch2); | return _ToLower(ch1) == _ToLower(ch2); |
else |
|
return ch1 == ch2; | return ch1 == ch2; |
} | } |
| |
|
|
@parm str String containing the string to be matched\ | @parm str String containing the string to be matched\ |
@parm pattern GLOB style patterh to use in the match. | @parm pattern GLOB style patterh to use in the match. |
@return Boolean true if str matches patterh | @return Boolean true if str matches patterh |
@SeeAlso match |
@see match |
*/ | */ |
Boolean String::matchNoCase(const String& str, const String& pattern) | Boolean String::matchNoCase(const String& str, const String& pattern) |
{ | { |
|
|
| |
PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str) | PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str) |
{ | { |
|
|
|
#if defined(PEGASUS_OS_OS400) |
|
CString cstr = str.getCString(); |
|
const char* utf8str = cstr; |
|
|
|
os << utf8str; |
|
|
|
#elif defined(PEGASUS_HAS_ICU) |
|
if (InitializeICU::initICUSuccessful()) |
|
{ |
|
char *buf = NULL; |
|
const int size = str.size() * 6; |
|
UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size()); |
|
Uint32 bufsize = UniStr.extract(0,size,buf); |
|
|
|
buf = new char[bufsize+1]; |
|
UniStr.extract(0,bufsize,buf); |
|
os << buf; |
|
os.flush(); |
|
delete [] buf; |
|
} |
|
else |
|
#endif // End of PEGASUS_HAS_ICU #else leg. |
|
{ |
for (Uint32 i = 0, n = str.size(); i < n; i++) | for (Uint32 i = 0, n = str.size(); i < n; i++) |
{ | { |
Uint16 code = str[i]; | Uint16 code = str[i]; |
|
|
os << buffer; | os << buffer; |
} | } |
} | } |
|
} |
| |
return os; | return os; |
} | } |
|
|
return String::compare(str1, str2) >= 0; | return String::compare(str1, str2) >= 0; |
} | } |
| |
#ifndef PEGASUS_REMOVE_DEPRECATED |
|
int CompareNoCase(const char* s1, const char* s2) |
|
{ |
|
return System::strcasecmp(s1, s2); |
|
} |
|
#endif |
|
|
|
PEGASUS_NAMESPACE_END | PEGASUS_NAMESPACE_END |