pegasus/src/Pegasus/Common/String.cpp - diff

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/String.cpp between version 1.135 and 1.138

version 1.135, 2008/12/16 18:56:00

version 1.138, 2010/07/16 10:15:31

Line 32

#include <Pegasus/Common/PegasusAssert.h>

#include <cstring>

#include "InternalException.h"

#include "CommonUTF.h"

#include "MessageLoader.h"

#include "StringRep.h"

Line 170

Line 169

return x;

}

template<class P, class Q>

static void _copy(P* p, const Q* q, size_t n)

{

// The following employs loop unrolling for efficiency. Please do not

// eliminate.

while (n >= 8)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p[4] = q[4];

p[5] = q[5];

p[6] = q[6];

p[7] = q[7];

p += 8;

q += 8;

n -= 8;

}

while (n >= 4)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p += 4;

q += 4;

n -= 4;

}

while (n--)

*p++ = *q++;

}

static Uint16* _find(const Uint16* s, size_t n, Uint16 c)

{

// The following employs loop unrolling for efficiency. Please do not

Line 303

Line 266

throw NullPointer();

}

static void _StringThrowBadUTF8(Uint32 index)

#define BADUTF8_MAX_CLEAR_CHAR 40

{

#define BADUTF8_MAX_CHAR_TO_HEX 10

MessageLoaderParms parms(

"Common.String.BAD_UTF8",

"The byte sequence starting at index $0 "

"is not valid UTF-8 encoding.",

index);

throw Exception(parms);

}

static size_t _copyFromUTF8(

static void _formatBadUTF8Chars(

Uint16* dest,

char* buffer,

const char* src,

Uint32 index,

size_t n,

const char* q,

size_t& utf8_error_index)

size_t n )

{

Uint16* p = dest;

const Uint8* q = (const Uint8*)src;

// Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).

char tmp[20];

// Use loop-unrolling.

const char* start;

while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)

size_t clearChar =

{

(( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR );

p[0] = q[0];

size_t charToHex =

p[1] = q[1];

((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ?

p[2] = q[2];

(n-index-1) : BADUTF8_MAX_CHAR_TO_HEX );

p[3] = q[3];

p[4] = q[4];

p[5] = q[5];

p[6] = q[6];

p[7] = q[7];

p += 8;

q += 8;

n -= 8;

}

while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

p[3] = q[3];

p += 4;

q += 4;

n -= 4;

}

switch (n)

if (index < BADUTF8_MAX_CLEAR_CHAR)

{

case 0:

start = q;

return p - dest;

} else

case 1:

if (q[0] < 128)

{

p[0] = q[0];

start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]);

return p + 1 - dest;

}

break;

case 2:

if (((q[0]|q[1]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

return p + 2 - dest;

}

break;

case 3:

if (((q[0]|q[1]|q[2]) & 0x80) == 0)

{

p[0] = q[0];

p[1] = q[1];

p[2] = q[2];

return p + 3 - dest;

}

break;

}

// Process remaining characters.

// Intialize the buffer with the first character as '\0' to be able to use

// strnchat() and strcat()

while (n)

buffer[0] = 0;

// Start the buffer with the valid UTF8 chars

strncat(buffer,start,clearChar);

for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ )

{

// Optimize for 7-bit ASCII case.

tmp[0] = 0;

sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]);

strncat(buffer,&(tmp[0]),5);

}

if (*q < 128)

{

*p++ = *q++;

n--;

}

else

{

Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;

if (c > n || !isValid_U8(q, c) ||

static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n)

UTF8toUTF16(&q, q + c, &p, p + n) != 0)

{

utf8_error_index = q - (const Uint8*)src;

char buffer[1024];

return size_t(-1);

}

n -= c;

_formatBadUTF8Chars(&(buffer[0]),index,q,n);

}

MessageLoaderParms parms(

"Common.String.BAD_UTF8_LONG",

"The byte sequence starting at index $0 "

"is not valid UTF-8 encoding: $1",

index,buffer);

return p - dest;

throw Exception(parms);

}

// Note: dest must be at least three times src (plus an extra byte for

Line 468

Line 380

return p - (Uint8*)dest;

}

static inline size_t _convert(

Uint16* p, const char* q, size_t n, size_t& utf8_error_index)

{

#ifdef PEGASUS_STRING_NO_UTF8

_copy(p, q, n);

return n;

#else

return _copyFromUTF8(p, q, n, utf8_error_index);

#endif

}

//==============================================================================

// class CString

Line 580

Line 481

if (rep->size == size_t(-1))

{

StringRep::free(rep);

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index, data,size);

}

rep->data[rep->size] = '\0';

Line 670

Line 571

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);

}

_rep->size = n1 + tmp;

Line 690

Line 591

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);

}

_rep->size = n2 + tmp;

Line 743

Line 644

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index,str,n);

}

_rep->data[_rep->size] = 0;

Line 832

Line 733

{

StringRep::free(_rep);

_rep = &StringRep::_emptyRep;

_StringThrowBadUTF8((Uint32)utf8_error_index);

_StringThrowBadUTF8((Uint32)utf8_error_index,str,size);

}

_rep->size += tmp;

Line 1234

Line 1135

Boolean String::equal(const String& s1, const String& s2)

{

return (s1._rep == s2._rep) ||

(s1._rep->size == s2._rep->size) &&

((s1._rep->size == s2._rep->size) &&

memcmp(s1._rep->data,

s2._rep->data,

s1._rep->size * sizeof(Uint16)) == 0;

s1._rep->size * sizeof(Uint16)) == 0);

}

Boolean String::equal(const String& s1, const char* s2)

Legend:

Removed from v.1.135
changed lines
	Added in v.1.138

No CVS admin address has been configured