pegasus/src/Pegasus/Common/String.cpp - diff

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/String.cpp between version 1.127 and 1.138

version 1.127, 2007/06/12 18:15:51

version 1.138, 2010/07/16 10:15:31

Line 1

//%2006////////////////////////////////////////////////////////////////////////

//%LICENSE////////////////////////////////////////////////////////////////

// Licensed to The Open Group (TOG) under one or more contributor license

// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.

// agreements. Refer to the OpenPegasusNOTICE.txt file distributed with

// this work for additional information regarding copyright ownership.

// IBM Corp.; EMC Corporation, The Open Group.

// Each contributor licenses this file to you under the OpenPegasus Open

// Source License; you may not use this file except in compliance with the

// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.

// License.

// EMC Corporation; VERITAS Software Corporation; The Open Group.

// EMC Corporation; Symantec Corporation; The Open Group.

// Permission is hereby granted, free of charge, to any person obtaining a copy

// Permission is hereby granted, free of charge, to any person obtaining a

// of this software and associated documentation files (the "Software"), to

// copy of this software and associated documentation files (the "Software"),

// deal in the Software without restriction, including without limitation the

// to deal in the Software without restriction, including without limitation

// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

// the rights to use, copy, modify, merge, publish, distribute, sublicense,

// sell copies of the Software, and to permit persons to whom the Software is

// and/or sell copies of the Software, and to permit persons to whom the

// furnished to do so, subject to the following conditions:

// Software is furnished to do so, subject to the following conditions:

// THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN

// The above copyright notice and this permission notice shall be included

// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED

// in all copies or substantial portions of the Software.

// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT

// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR

// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT

// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

//==============================================================================

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

//////////////////////////////////////////////////////////////////////////

//%/////////////////////////////////////////////////////////////////////////////

#include <Pegasus/Common/PegasusAssert.h>

#include <cstring>

#include "InternalException.h"

#include "CommonUTF.h"

#include "MessageLoader.h"

#include "StringRep.h"

#ifdef PEGASUS_HAS_ICU

# include <unicode/ures.h>

#include <unicode/ustring.h>

#include <unicode/uchar.h>

#endif

Line 171

Line 169

return x;

}

template<class P, class Q>

static void _copy(P* p, const Q* q, size_t n)

{

// The following employs loop unrolling for efficiency. Please do not

// eliminate.

while (n >= 8)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p[4] = q[4];

p[5] = q[5];

p[6] = q[6];

p[7] = q[7];

p += 8;

q += 8;

n -= 8;

}

while (n >= 4)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p += 4;

q += 4;

n -= 4;

}

while (n--)

*p++ = *q++;

}

static Uint16* _find(const Uint16* s, size_t n, Uint16 c)

{

// The following employs loop unrolling for efficiency. Please do not

Line 268

Line 230

return 0;

}

#ifdef PEGASUS_STRING_NO_UTF8

static int _compareNoUTF8(const Uint16* s1, const char* s2)

{

Uint16 c1;

Line 285

Line 248

return c1 - c2;

}

#endif

static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)

{

Line 302

Line 266

throw NullPointer();

}

static void _StringThrowBadUTF8(Uint32 index)

#define BADUTF8_MAX_CLEAR_CHAR 40

{

#define BADUTF8_MAX_CHAR_TO_HEX 10

MessageLoaderParms parms(

"Common.String.BAD_UTF8",

"The byte sequence starting at index $0 "

"is not valid UTF-8 encoding.",

index);

throw Exception(parms);

}

static size_t _copyFromUTF8(

static void _formatBadUTF8Chars(

Uint16* dest,

char* buffer,

const char* src,

Uint32 index,

size_t n,

const char* q,

size_t& utf8_error_index)

size_t n )

{

Uint16* p = dest;

const Uint8* q = (const Uint8*)src;

// Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).

char tmp[20];

// Use loop-unrolling.

const char* start;

while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)

size_t clearChar =

{

(( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR );

p[0] = q[0];

size_t charToHex =

p[1] = q[1];

((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ?

p[2] = q[2];

(n-index-1) : BADUTF8_MAX_CHAR_TO_HEX );

p[3] = q[3];

p[4] = q[4];

p[5] = q[5];

p[6] = q[6];

p[7] = q[7];

p += 8;

q += 8;

n -= 8;

}

while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p += 4;

q += 4;

n -= 4;

}

switch (n)

if (index < BADUTF8_MAX_CLEAR_CHAR)

{

case 0:

return p - dest;

case 1:

if (q[0] < 128)

{

p[0] = q[0];

start = q;

return p + 1 - dest;

} else

}

break;

case 2:

if (((q[0]|q[1]) & 0x80) == 0)

{

p[0] = q[0];

start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]);

p[1] = q[1];

return p + 2 - dest;

}

break;

case 3:

if (((q[0]|q[1]|q[2]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

return p + 3 - dest;

}

break;

}

// Process remaining characters.

while (n)

// Intialize the buffer with the first character as '\0' to be able to use

// strnchat() and strcat()

buffer[0] = 0;

// Start the buffer with the valid UTF8 chars

strncat(buffer,start,clearChar);

for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ )

{

// Optimize for 7-bit ASCII case.

tmp[0] = 0;

sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]);

strncat(buffer,&(tmp[0]),5);

}

if (*q < 128)

{

*p++ = *q++;

n--;

}

else

{

Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;

if (c > n || !isValid_U8(q, c) ||

static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n)

UTF8toUTF16(&q, q + c, &p, p + n) != 0)

{

utf8_error_index = q - (const Uint8*)src;

char buffer[1024];

return size_t(-1);

}

n -= c;

_formatBadUTF8Chars(&(buffer[0]),index,q,n);

}

MessageLoaderParms parms(

"Common.String.BAD_UTF8_LONG",

"The byte sequence starting at index $0 "

"is not valid UTF-8 encoding: $1",

index,buffer);

return p - dest;

throw Exception(parms);

}

// Note: dest must be at least three times src (plus an extra byte for

Line 467

Line 380

return p - (Uint8*)dest;

}

static inline size_t _convert(

Uint16* p, const char* q, size_t n, size_t& utf8_error_index)

{

#ifdef PEGASUS_STRING_NO_UTF8

_copy(p, q, n);

return n;

#else

return _copyFromUTF8(p, q, n, utf8_error_index);

#endif

}

//==============================================================================

// class CString

Line 579

Line 481

if (rep->size == size_t(-1))

{

StringRep::free(rep);

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index, data,size);

}

rep->data[rep->size] = '\0';

Line 669

Line 571

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);

}

_rep->size = n1 + tmp;

Line 689

Line 591

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);

}

_rep->size = n2 + tmp;

Line 742

Line 644

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index,str,n);

}

_rep->data[_rep->size] = 0;

Line 831

Line 733

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index,str,size);

}

_rep->size += tmp;

Line 1232

Line 1134

Boolean String::equal(const String& s1, const String& s2)

{

return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,

return (s1._rep == s2._rep) ||

s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;

((s1._rep->size == s2._rep->size) &&

memcmp(s1._rep->data,

s2._rep->data,

s1._rep->size * sizeof(Uint16)) == 0);

}

Boolean String::equal(const String& s1, const char* s2)

Line 1319

Line 1224

_rep = tmp;

}

void AssignASCII(String& s, const char* str, Uint32 n)

{

class StringLayout

{

public:

StringRep* rep;

};

StringLayout* that = reinterpret_cast<StringLayout*>(&s);

_checkNullPointer(str);

if (n > that->rep->cap || that->rep->refs.get() != 1)

{

StringRep::unref(that->rep);

that->rep = StringRep::alloc(n);

}

_copy(that->rep->data, str, n);

that->rep->size = n;

that->rep->data[that->rep->size] = 0;

}

PEGASUS_NAMESPACE_END

Legend:

Removed from v.1.127
changed lines
	Added in v.1.138

No CVS admin address has been configured