version 1.127, 2007/06/12 18:15:51
|
version 1.138, 2010/07/16 10:15:31
|
|
|
//%2006//////////////////////////////////////////////////////////////////////// |
//%LICENSE//////////////////////////////////////////////////////////////// |
// | // |
// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development |
// Licensed to The Open Group (TOG) under one or more contributor license |
// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. |
// agreements. Refer to the OpenPegasusNOTICE.txt file distributed with |
// Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.; |
// this work for additional information regarding copyright ownership. |
// IBM Corp.; EMC Corporation, The Open Group. |
// Each contributor licenses this file to you under the OpenPegasus Open |
// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; |
// Source License; you may not use this file except in compliance with the |
// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. |
// License. |
// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
|
// EMC Corporation; VERITAS Software Corporation; The Open Group. |
|
// Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; |
|
// EMC Corporation; Symantec Corporation; The Open Group. |
|
// | // |
// Permission is hereby granted, free of charge, to any person obtaining a copy |
// Permission is hereby granted, free of charge, to any person obtaining a |
// of this software and associated documentation files (the "Software"), to |
// copy of this software and associated documentation files (the "Software"), |
// deal in the Software without restriction, including without limitation the |
// to deal in the Software without restriction, including without limitation |
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
// the rights to use, copy, modify, merge, publish, distribute, sublicense, |
// sell copies of the Software, and to permit persons to whom the Software is |
// and/or sell copies of the Software, and to permit persons to whom the |
// furnished to do so, subject to the following conditions: |
// Software is furnished to do so, subject to the following conditions: |
// | // |
// THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN |
// The above copyright notice and this permission notice shall be included |
// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED |
// in all copies or substantial portions of the Software. |
// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT |
|
// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
|
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
|
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
// | // |
//============================================================================== |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
|
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
// |
|
////////////////////////////////////////////////////////////////////////// |
// | // |
//%///////////////////////////////////////////////////////////////////////////// | //%///////////////////////////////////////////////////////////////////////////// |
| |
#include <Pegasus/Common/PegasusAssert.h> | #include <Pegasus/Common/PegasusAssert.h> |
#include <cstring> | #include <cstring> |
#include "InternalException.h" | #include "InternalException.h" |
#include "CommonUTF.h" |
|
#include "MessageLoader.h" | #include "MessageLoader.h" |
#include "StringRep.h" | #include "StringRep.h" |
| |
#ifdef PEGASUS_HAS_ICU | #ifdef PEGASUS_HAS_ICU |
|
# include <unicode/ures.h> |
#include <unicode/ustring.h> | #include <unicode/ustring.h> |
#include <unicode/uchar.h> | #include <unicode/uchar.h> |
#endif | #endif |
|
|
return x; | return x; |
} | } |
| |
template<class P, class Q> |
|
static void _copy(P* p, const Q* q, size_t n) |
|
{ |
|
// The following employs loop unrolling for efficiency. Please do not |
|
// eliminate. |
|
|
|
while (n >= 8) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p[4] = q[4]; |
|
p[5] = q[5]; |
|
p[6] = q[6]; |
|
p[7] = q[7]; |
|
p += 8; |
|
q += 8; |
|
n -= 8; |
|
} |
|
|
|
while (n >= 4) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p += 4; |
|
q += 4; |
|
n -= 4; |
|
} |
|
|
|
while (n--) |
|
*p++ = *q++; |
|
} |
|
|
|
static Uint16* _find(const Uint16* s, size_t n, Uint16 c) | static Uint16* _find(const Uint16* s, size_t n, Uint16 c) |
{ | { |
// The following employs loop unrolling for efficiency. Please do not | // The following employs loop unrolling for efficiency. Please do not |
|
|
return 0; | return 0; |
} | } |
| |
|
#ifdef PEGASUS_STRING_NO_UTF8 |
static int _compareNoUTF8(const Uint16* s1, const char* s2) | static int _compareNoUTF8(const Uint16* s1, const char* s2) |
{ | { |
Uint16 c1; | Uint16 c1; |
|
|
| |
return c1 - c2; | return c1 - c2; |
} | } |
|
#endif |
| |
static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) | static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) |
{ | { |
|
|
throw NullPointer(); | throw NullPointer(); |
} | } |
| |
static void _StringThrowBadUTF8(Uint32 index) |
#define BADUTF8_MAX_CLEAR_CHAR 40 |
{ |
#define BADUTF8_MAX_CHAR_TO_HEX 10 |
MessageLoaderParms parms( |
|
"Common.String.BAD_UTF8", |
|
"The byte sequence starting at index $0 " |
|
"is not valid UTF-8 encoding.", |
|
index); |
|
throw Exception(parms); |
|
} |
|
| |
static size_t _copyFromUTF8( |
static void _formatBadUTF8Chars( |
Uint16* dest, |
char* buffer, |
const char* src, |
Uint32 index, |
size_t n, |
const char* q, |
size_t& utf8_error_index) |
size_t n ) |
{ | { |
Uint16* p = dest; |
|
const Uint8* q = (const Uint8*)src; |
|
| |
// Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). |
char tmp[20]; |
// Use loop-unrolling. |
const char* start; |
| |
while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0) |
size_t clearChar = |
{ |
(( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR ); |
p[0] = q[0]; |
size_t charToHex = |
p[1] = q[1]; |
((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ? |
p[2] = q[2]; |
(n-index-1) : BADUTF8_MAX_CHAR_TO_HEX ); |
p[3] = q[3]; |
|
p[4] = q[4]; |
|
p[5] = q[5]; |
|
p[6] = q[6]; |
|
p[7] = q[7]; |
|
p += 8; |
|
q += 8; |
|
n -= 8; |
|
} |
|
|
|
while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
p[3] = q[3]; |
|
p += 4; |
|
q += 4; |
|
n -= 4; |
|
} |
|
| |
switch (n) |
if (index < BADUTF8_MAX_CLEAR_CHAR) |
{ |
|
case 0: |
|
return p - dest; |
|
case 1: |
|
if (q[0] < 128) |
|
{ | { |
p[0] = q[0]; |
start = q; |
return p + 1 - dest; |
} else |
} |
|
break; |
|
case 2: |
|
if (((q[0]|q[1]) & 0x80) == 0) |
|
{ | { |
p[0] = q[0]; |
start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]); |
p[1] = q[1]; |
|
return p + 2 - dest; |
|
} | } |
break; |
|
case 3: |
|
if (((q[0]|q[1]|q[2]) & 0x80) == 0) |
|
{ |
|
p[0] = q[0]; |
|
p[1] = q[1]; |
|
p[2] = q[2]; |
|
return p + 3 - dest; |
|
} |
|
break; |
|
} |
|
|
|
// Process remaining characters. |
|
| |
while (n) |
// Intialize the buffer with the first character as '\0' to be able to use |
|
// strnchat() and strcat() |
|
buffer[0] = 0; |
|
// Start the buffer with the valid UTF8 chars |
|
strncat(buffer,start,clearChar); |
|
for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ ) |
{ | { |
// Optimize for 7-bit ASCII case. |
tmp[0] = 0; |
|
sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]); |
|
strncat(buffer,&(tmp[0]),5); |
|
} |
| |
if (*q < 128) |
|
{ |
|
*p++ = *q++; |
|
n--; |
|
} | } |
else |
|
{ |
|
Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1; |
|
| |
if (c > n || !isValid_U8(q, c) || |
static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n) |
UTF8toUTF16(&q, q + c, &p, p + n) != 0) |
|
{ | { |
utf8_error_index = q - (const Uint8*)src; |
char buffer[1024]; |
return size_t(-1); |
|
} |
|
| |
n -= c; |
_formatBadUTF8Chars(&(buffer[0]),index,q,n); |
} |
|
} |
MessageLoaderParms parms( |
|
"Common.String.BAD_UTF8_LONG", |
|
"The byte sequence starting at index $0 " |
|
"is not valid UTF-8 encoding: $1", |
|
index,buffer); |
| |
return p - dest; |
throw Exception(parms); |
} | } |
| |
// Note: dest must be at least three times src (plus an extra byte for | // Note: dest must be at least three times src (plus an extra byte for |
|
|
return p - (Uint8*)dest; | return p - (Uint8*)dest; |
} | } |
| |
static inline size_t _convert( |
|
Uint16* p, const char* q, size_t n, size_t& utf8_error_index) |
|
{ |
|
#ifdef PEGASUS_STRING_NO_UTF8 |
|
_copy(p, q, n); |
|
return n; |
|
#else |
|
return _copyFromUTF8(p, q, n, utf8_error_index); |
|
#endif |
|
} |
|
|
|
//============================================================================== | //============================================================================== |
// | // |
// class CString | // class CString |
|
|
if (rep->size == size_t(-1)) | if (rep->size == size_t(-1)) |
{ | { |
StringRep::free(rep); | StringRep::free(rep); |
_StringThrowBadUTF8((Uint32)utf8_error_index); |
_StringThrowBadUTF8((Uint32)utf8_error_index, data,size); |
} | } |
| |
rep->data[rep->size] = '\0'; | rep->data[rep->size] = '\0'; |
|
|
{ | { |
StringRep::free(_rep); | StringRep::free(_rep); |
_rep = &StringRep::_emptyRep; | _rep = &StringRep::_emptyRep; |
_StringThrowBadUTF8((Uint32)utf8_error_index); |
_StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2); |
} | } |
| |
_rep->size = n1 + tmp; | _rep->size = n1 + tmp; |
|
|
{ | { |
StringRep::free(_rep); | StringRep::free(_rep); |
_rep = &StringRep::_emptyRep; | _rep = &StringRep::_emptyRep; |
_StringThrowBadUTF8((Uint32)utf8_error_index); |
_StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1); |
} | } |
| |
_rep->size = n2 + tmp; | _rep->size = n2 + tmp; |
|
|
{ | { |
StringRep::free(_rep); | StringRep::free(_rep); |
_rep = &StringRep::_emptyRep; | _rep = &StringRep::_emptyRep; |
_StringThrowBadUTF8((Uint32)utf8_error_index); |
_StringThrowBadUTF8((Uint32)utf8_error_index,str,n); |
} | } |
| |
_rep->data[_rep->size] = 0; | _rep->data[_rep->size] = 0; |
|
|
{ | { |
StringRep::free(_rep); | StringRep::free(_rep); |
_rep = &StringRep::_emptyRep; | _rep = &StringRep::_emptyRep; |
_StringThrowBadUTF8((Uint32)utf8_error_index); |
_StringThrowBadUTF8((Uint32)utf8_error_index,str,size); |
} | } |
| |
_rep->size += tmp; | _rep->size += tmp; |
|
|
| |
Boolean String::equal(const String& s1, const String& s2) | Boolean String::equal(const String& s1, const String& s2) |
{ | { |
return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, |
return (s1._rep == s2._rep) || |
s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0; |
((s1._rep->size == s2._rep->size) && |
|
memcmp(s1._rep->data, |
|
s2._rep->data, |
|
s1._rep->size * sizeof(Uint16)) == 0); |
} | } |
| |
Boolean String::equal(const String& s1, const char* s2) | Boolean String::equal(const String& s1, const char* s2) |
|
|
_rep = tmp; | _rep = tmp; |
} | } |
| |
|
void AssignASCII(String& s, const char* str, Uint32 n) |
|
{ |
|
class StringLayout |
|
{ |
|
public: |
|
StringRep* rep; |
|
}; |
|
|
|
StringLayout* that = reinterpret_cast<StringLayout*>(&s); |
|
|
|
_checkNullPointer(str); |
|
|
|
if (n > that->rep->cap || that->rep->refs.get() != 1) |
|
{ |
|
StringRep::unref(that->rep); |
|
that->rep = StringRep::alloc(n); |
|
} |
|
|
|
_copy(that->rep->data, str, n); |
|
that->rep->size = n; |
|
that->rep->data[that->rep->size] = 0; |
|
} |
|
|
PEGASUS_NAMESPACE_END | PEGASUS_NAMESPACE_END |
| |
/* | /* |