version 1.60, 2002/09/11 19:11:54
|
version 1.80, 2003/09/26 17:52:18
|
|
|
| |
| |
#include <cctype> | #include <cctype> |
|
#include <cstring> |
#include "String.h" | #include "String.h" |
#include "Array.h" | #include "Array.h" |
#include "InternalException.h" | #include "InternalException.h" |
#include <iostream> | #include <iostream> |
#ifdef PEGASUS_LEGACY_API |
#include <fstream> |
|
#ifndef PEGASUS_REMOVE_DEPRECATED |
#include "System.h" // for strcasecmp | #include "System.h" // for strcasecmp |
#endif | #endif |
| |
|
#include "CommonUTF.h" |
|
|
|
#ifdef PEGASUS_HAS_ICU |
|
#include <unicode/unistr.h> |
|
#endif |
|
|
PEGASUS_USING_STD; | PEGASUS_USING_STD; |
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
|
|
| |
String::String(const String& str) | String::String(const String& str) |
{ | { |
|
if (str._rep != NULL) |
|
{ |
_rep = new StringRep(*str._rep); | _rep = new StringRep(*str._rep); |
} | } |
|
else |
|
{ |
|
_rep = new StringRep(); |
|
} |
|
} |
|
|
| |
String::String(const String& str, Uint32 n) | String::String(const String& str, Uint32 n) |
{ | { |
|
|
assign(str); | assign(str); |
} | } |
| |
|
String::String(const char* str, const char* utfFlag) |
|
{ |
|
_rep = new StringRep; |
|
|
|
if(!memcmp(utfFlag,STRING_FLAG_UTF8,sizeof(STRING_FLAG_UTF8))) |
|
{ |
|
assignUTF8(str); |
|
} |
|
else |
|
{ |
|
assign(str); |
|
} |
|
} |
|
|
String::String(const char* str, Uint32 n) | String::String(const char* str, Uint32 n) |
{ | { |
_rep = new StringRep; | _rep = new StringRep; |
|
|
_rep->c16a.reserveCapacity(n); | _rep->c16a.reserveCapacity(n); |
| |
while (n--) | while (n--) |
_rep->c16a.append(*str++); |
_rep->c16a.append(Uint8(*str++)); |
| |
return *this; | return *this; |
} | } |
|
|
_rep->c16a.reserveCapacity(_n + 1); | _rep->c16a.reserveCapacity(_n + 1); |
| |
while (_n--) | while (_n--) |
_rep->c16a.append(*str++); |
_rep->c16a.append(Uint8(*str++)); |
| |
_rep->c16a.append('\0'); | _rep->c16a.append('\0'); |
| |
|
|
return PEG_NOT_FOUND; | return PEG_NOT_FOUND; |
} | } |
| |
|
// ATTN-RK-P3-20020509: Define case-sensitivity for non-English characters |
|
// ATTN-CEC-20030913: ICU code added, but uses the server's locale. Look at adding |
|
// a toLower( ) with Locale parameter - like ICU's toLower( ) |
void String::toLower() | void String::toLower() |
{ | { |
|
#ifdef PEGASUS_HAS_ICU |
|
UnicodeString UniStr((const UChar *)_rep->c16a.getData()); |
|
UniStr.toLower(); |
|
UniStr.append((UChar)'\0'); |
|
|
|
assign((Char16*)UniStr.getBuffer()); |
|
#else |
for (Char16* p = &_rep->c16a[0]; *p; p++) | for (Char16* p = &_rep->c16a[0]; *p; p++) |
{ | { |
if (*p <= PEGASUS_MAX_PRINTABLE_CHAR) | if (*p <= PEGASUS_MAX_PRINTABLE_CHAR) |
*p = tolower(*p); | *p = tolower(*p); |
} | } |
|
#endif |
} | } |
| |
int String::compare(const String& s1, const String& s2, Uint32 n) | int String::compare(const String& s1, const String& s2, Uint32 n) |
|
|
| |
int String::compareNoCase(const String& s1, const String& s2) | int String::compareNoCase(const String& s1, const String& s2) |
{ | { |
|
#ifdef PEGASUS_HAS_ICU |
|
UnicodeString UniStr1((const UChar *)s1.getChar16Data(), (int32_t)s1.size()); |
|
UnicodeString UniStr2((const UChar *)s2.getChar16Data(), (int32_t)s2.size()); |
|
UniStr1.toLower(); |
|
UniStr2.toLower(); |
|
return (UniStr2.compare(UniStr1)); |
|
#else |
const Char16* _s1 = s1.getChar16Data(); | const Char16* _s1 = s1.getChar16Data(); |
const Char16* _s2 = s2.getChar16Data(); | const Char16* _s2 = s2.getChar16Data(); |
| |
|
|
return 1; | return 1; |
| |
return 0; | return 0; |
|
#endif |
} | } |
| |
Boolean String::equal(const String& str1, const String& str2) | Boolean String::equal(const String& str1, const String& str2) |
|
|
| |
Boolean String::equalNoCase(const String& str1, const String& str2) | Boolean String::equalNoCase(const String& str1, const String& str2) |
{ | { |
|
#ifdef PEGASUS_HAS_ICU |
|
UnicodeString UniStr1((const UChar *)str1.getChar16Data(), (int32_t)str1.size()); |
|
UnicodeString UniStr2((const UChar *)str2.getChar16Data(), (int32_t)str2.size()); |
|
UniStr1.toLower(); |
|
UniStr2.toLower(); |
|
return (UniStr1 == UniStr2); |
|
#else |
if (str1.size() != str2.size()) | if (str1.size() != str2.size()) |
return false; | return false; |
| |
|
|
} | } |
| |
return true; | return true; |
|
#endif |
|
} |
|
|
|
// UTF8 specific code: |
|
String& String::assignUTF8(const char* str) |
|
{ |
|
_rep->c16a.clear(); |
|
Uint32 n = strlen(str) + 1; |
|
|
|
const Uint8 *strsrc = (Uint8 *)str; |
|
Uint8 *endsrc = (Uint8 *)&str[n-1]; |
|
|
|
Char16 *msg16 = new Char16[n]; |
|
Uint16 *strtgt = (Uint16 *)msg16; |
|
Uint16 *endtgt = (Uint16 *)&msg16[n]; |
|
|
|
UTF8toUTF16(&strsrc, |
|
endsrc, |
|
&strtgt, |
|
endtgt); |
|
|
|
Uint32 count; |
|
|
|
for(count = 0; ((msg16[count]) != Char16(0x00)) && (count <= n); ++count); |
|
|
|
_rep->c16a.append(msg16, count); |
|
|
|
_rep->c16a.append('\0'); |
|
|
|
delete [] msg16; |
|
|
|
return *this; |
} | } |
| |
|
CString String::getCStringUTF8() const |
|
{ |
|
Uint32 n = 3*size() + 1; |
|
char* str = new char[n]; |
| |
|
const Char16* msg16 = getChar16Data(); |
|
|
|
const Uint16 *strsrc = (Uint16 *)msg16; |
|
Uint16 *endsrc = (Uint16 *)&msg16[size()+1]; |
|
|
|
Uint8 *strtgt = (Uint8 *)str; |
|
Uint8 *endtgt = (Uint8 *)&str[n]; |
|
|
|
UTF16toUTF8 (&strsrc, |
|
endsrc, |
|
&strtgt, |
|
endtgt); |
|
|
|
char* str1 = new char[strlen(str)+1]; |
|
strcpy(str1,str); |
|
delete [] str; |
|
|
|
return CString(str1); |
|
} |
|
|
|
Boolean String::isUTF8(const char *legal) |
|
{ |
|
char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1; |
|
|
|
// Validate that the string is long enough to hold all the expected bytes. |
|
// Note that if legal[0] == 0, numBytes will be 1. |
|
for (char i=1; i<numBytes; i++) |
|
{ |
|
if (legal[i] == 0) |
|
{ |
|
return false; |
|
} |
|
} |
|
|
|
return (isValid_U8((const Uint8 *)legal, numBytes)); |
|
} |
|
|
|
#if 0 |
// ATTN-RK-P3-20020603: This code is not completely correct | // ATTN-RK-P3-20020603: This code is not completely correct |
// Wildcard String matching function that may be useful in the future | // Wildcard String matching function that may be useful in the future |
// The following code was provided by Bob Blair. | // The following code was provided by Bob Blair. |
|
|
special characters in the pattern: *?\[] (see the manual | special characters in the pattern: *?\[] (see the manual |
entry for details on what these mean). | entry for details on what these mean). |
| |
|
|
Side effects: None. | Side effects: None. |
*/ | */ |
| |
|
|
| |
inline Uint16 _ToLower(Uint16 ch) | inline Uint16 _ToLower(Uint16 ch) |
{ | { |
|
// ICU_TODO: If ICU is available we should do this the correct way. |
return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch; | return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch; |
} | } |
| |
inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase) | inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase) |
{ | { |
|
// ICU_TODO: If ICU is available we should do this the correct way. |
if (nocase) | if (nocase) |
return _ToLower(ch1) == _ToLower(ch2); | return _ToLower(ch1) == _ToLower(ch2); |
else | else |
|
|
} | } |
| |
| |
|
/** match matches a string against a GLOB style pattern. |
|
Return trues if the String parameter matches the pattern. C-Shell style |
|
glob matching is used. |
|
@param str String to be matched against the pattern |
|
@param pattern Pattern to use in the match |
|
@return Boolean true if str matches pattern |
|
The pattern definition is as follows: |
|
<pre> |
|
* Matches any number of any characters |
|
? Match exactly one character |
|
[chars] Match any character in chars |
|
[chara-charb] Match any character in the range between chara and charb |
|
</pre> |
|
The literal characters *, ?, [, ] can be included in a string by |
|
escaping them with backslash "\". Ranges of characters can be concatenated. |
|
<pre> |
|
examples: |
|
Boolean result = String::match("This is a test", "*is*"); |
|
Boolean works = String::match("abcdef123", "*[0-9]"); |
|
</pre> |
|
*/ |
Boolean String::match(const String& str, const String& pattern) | Boolean String::match(const String& str, const String& pattern) |
{ | { |
return _StringMatch( | return _StringMatch( |
(Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0; | (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0; |
} | } |
| |
|
/** matchNoCase Matches a String against a GLOB style pattern independent |
|
of case. |
|
Returns true if the str parameter matches the pattern. C-Shell style |
|
glob matching is used. Ignore case in all comparisons. Case is |
|
ignored in the match. |
|
@parm str String containing the string to be matched\ |
|
@parm pattern GLOB style patterh to use in the match. |
|
@return Boolean true if str matches patterh |
|
@SeeAlso match |
|
*/ |
Boolean String::matchNoCase(const String& str, const String& pattern) | Boolean String::matchNoCase(const String& str, const String& pattern) |
{ | { |
return _StringMatch( | return _StringMatch( |
(Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0; | (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0; |
} | } |
|
#endif |
| |
| |
/////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////// |
|
|
| |
PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str) | PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str) |
{ | { |
|
|
|
#if defined(PEGASUS_OS_OS400) |
|
CString cstr = str.getCStringUTF8(); |
|
const char* utf8str = cstr; |
|
|
|
os << utf8str; |
|
|
|
#elif defined(PEGASUS_HAS_ICU) |
|
|
|
char *buf = NULL; |
|
const int size = str.size() * 6; |
|
UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size()); |
|
Uint32 bufsize = UniStr.extract(0,size,buf); |
|
|
|
buf = new char[bufsize+1]; |
|
UniStr.extract(0,bufsize,buf); |
|
os << buf; |
|
os.flush(); |
|
delete [] buf; |
|
#else |
|
|
|
|
for (Uint32 i = 0, n = str.size(); i < n; i++) | for (Uint32 i = 0, n = str.size(); i < n; i++) |
{ | { |
Uint16 code = str[i]; | Uint16 code = str[i]; |
|
|
os << buffer; | os << buffer; |
} | } |
} | } |
|
#endif // End of PEGASUS_HAS_ICU #else leg. |
| |
return os; | return os; |
} | } |
|
|
return String::compare(str1, str2) >= 0; | return String::compare(str1, str2) >= 0; |
} | } |
| |
#ifdef PEGASUS_LEGACY_API |
#ifndef PEGASUS_REMOVE_DEPRECATED |
int CompareNoCase(const char* s1, const char* s2) | int CompareNoCase(const char* s1, const char* s2) |
{ | { |
return System::strcasecmp(s1, s2); | return System::strcasecmp(s1, s2); |