pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
2 martin 1.134 //
3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license 4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with 5 // this work for additional information regarding copyright ownership. 6 // Each contributor licenses this file to you under the OpenPegasus Open 7 // Source License; you may not use this file except in compliance with the 8 // License.
9 martin 1.134 //
10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a 11 // copy of this software and associated documentation files (the "Software"), 12 // to deal in the Software without restriction, including without limitation 13 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 // and/or sell copies of the Software, and to permit persons to whom the 15 // Software is furnished to do so, subject to the following conditions:
16 martin 1.134 //
17 martin 1.133 // The above copyright notice and this permission notice shall be included 18 // in all copies or substantial portions of the Software.
19 martin 1.134 //
20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 martin 1.134 //
28 martin 1.133 //////////////////////////////////////////////////////////////////////////
29 mike 1.27 // 30 //%///////////////////////////////////////////////////////////////////////////// 31
32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
33 mike 1.113 #include <cstring>
34 kumpf 1.48 #include "InternalException.h"
35 mike 1.112 #include "MessageLoader.h" 36 #include "StringRep.h"
37 karl 1.140 #include <Pegasus/Common/Pegasus_inl.h> 38 #include <cstdarg>
39 david 1.69 40 #ifdef PEGASUS_HAS_ICU
41 kumpf 1.132 # include <unicode/ures.h> 42 # include <unicode/ustring.h> 43 # include <unicode/uchar.h>
44 david 1.69 #endif 45
46 mike 1.112 PEGASUS_NAMESPACE_BEGIN
47 mike 1.28
48 mike 1.112 //============================================================================== 49 // 50 // Compile-time macros (undefined by default). 51 // 52 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 53 // 54 //==============================================================================
55 mike 1.27
56 mike 1.112 //==============================================================================
57 kumpf 1.39 //
58 mike 1.112 // File-scope definitions:
59 kumpf 1.54 //
60 mike 1.112 //============================================================================== 61 62 // Note: this table is much faster than the system toupper(). Please do not 63 // change.
64 kumpf 1.54
65 david.dillard 1.116 const Uint8 _toUpperTable[256] =
66 kumpf 1.54 {
67 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 68 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 69 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 70 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 71 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 72 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 73 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 74 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 75 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 76 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 77 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 78 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 79 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 80 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 81 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 82 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 83 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 84 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 85 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 86 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 87 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 88 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 89 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 90 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 91 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 92 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 93 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 94 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 95 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 96 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 97 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 98 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 99 }; 100
101 dev.meetei 1.139 // Note: this table is much faster than the system tolower(). Please do not
102 mike 1.112 // change. 103
104 david.dillard 1.116 const Uint8 _toLowerTable[256] =
105 mike 1.112 { 106 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 107 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 108 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 109 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 110 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 111 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 112 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 113 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 114 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 115 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 116 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 117 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 118 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 119 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 120 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 121 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 122 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 123 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 124 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 125 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 126 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 127 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 128 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 129 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 130 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 131 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 132 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 133 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 134 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 135 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 136 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 137 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 138 }; 139 140 // Converts 16-bit characters to upper case. This routine is faster than the 141 // system toupper(). Please do not change. 142 inline Uint16 _toUpper(Uint16 x) 143 { 144 return (x & 0xFF00) ? x : _toUpperTable[x];
145 kumpf 1.54 } 146
147 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 148 // system toupper(). Please do not change. 149 inline Uint16 _toLower(Uint16 x)
150 kumpf 1.54 {
151 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 152 } 153 154 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 155 static Uint32 _roundUpToPow2(Uint32 x) 156 {
157 dave.sudlik 1.120 // Check for potential overflow in x 158 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
159 mike 1.112 160 if (x < 8) 161 return 8; 162 163 x--; 164 x \|= (x >> 1); 165 x \|= (x >> 2); 166 x \|= (x >> 4); 167 x \|= (x >> 8); 168 x \|= (x >> 16); 169 x++; 170 171 return x; 172 } 173 174 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 175 { 176 // The following employs loop unrolling for efficiency. Please do not 177 // eliminate. 178 179 while (n >= 4) 180 mike 1.112 { 181 if (s[0] == c) 182 return (Uint16)s; 183 if (s[1] == c) 184 return (Uint16)&s[1]; 185 if (s[2] == c) 186 return (Uint16)&s[2]; 187 if (s[3] == c) 188 return (Uint16)&s[3];
189 kumpf 1.82
190 mike 1.112 n -= 4; 191 s += 4; 192 } 193 194 if (n) 195 { 196 if (s == c) 197 return (Uint16)s; 198 s++; 199 n--; 200 } 201 202 if (n) 203 { 204 if (s == c) 205 return (Uint16)s; 206 s++; 207 n--; 208 } 209 210 if (n && s == c) 211 mike 1.112 return (Uint16)s; 212 213 // Not found! 214 return 0; 215 } 216 217 static int _compare(const Uint16* s1, const Uint16* s2) 218 { 219 while (s1 && s2) 220 { 221 int r = s1++ - s2++; 222 223 if (r) 224 return r; 225 } 226 227 if (s2) 228 return -1; 229 else if (s1) 230 return 1; 231 232 mike 1.112 return 0; 233 } 234
235 kumpf 1.130 #ifdef PEGASUS_STRING_NO_UTF8
236 mike 1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2) 237 { 238 Uint16 c1; 239 Uint16 c2; 240 241 do 242 { 243 c1 = s1++; 244 c2 = s2++; 245 246 if (c1 == 0) 247 return c1 - c2; 248 } 249 while (c1 == c2); 250 251 return c1 - c2; 252 }
253 kumpf 1.130 #endif
254 mike 1.112 255 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 256 { 257 memcpy(s1, s2, n * sizeof(Uint16)); 258 } 259 260 void StringThrowOutOfBounds() 261 { 262 throw IndexOutOfBoundsException(); 263 } 264 265 inline void _checkNullPointer(const void* ptr) 266 { 267 if (!ptr) 268 throw NullPointer(); 269 } 270
271 thilo.boehm 1.138 #define BADUTF8_MAX_CLEAR_CHAR 40 272 #define BADUTF8_MAX_CHAR_TO_HEX 10 273 274 static void _formatBadUTF8Chars( 275 char* buffer, 276 Uint32 index, 277 const char* q, 278 size_t n )
279 mike 1.112 {
280 thilo.boehm 1.138 281 char tmp[20]; 282 const char* start; 283 284 size_t clearChar = 285 (( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR ); 286 size_t charToHex = 287 ((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ? 288 (n-index-1) : BADUTF8_MAX_CHAR_TO_HEX ); 289 290 if (index < BADUTF8_MAX_CLEAR_CHAR) 291 { 292 start = q; 293 } else 294 { 295 start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]); 296 } 297 298 // Intialize the buffer with the first character as '\0' to be able to use 299 // strnchat() and strcat() 300 buffer[0] = 0; 301 thilo.boehm 1.138 // Start the buffer with the valid UTF8 chars 302 strncat(buffer,start,clearChar); 303 for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ ) 304 { 305 tmp[0] = 0; 306 sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]); 307 strncat(buffer,&(tmp[0]),5); 308 } 309 310 } 311 312 static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n) 313 { 314 char buffer[1024]; 315 316 _formatBadUTF8Chars(&(buffer[0]),index,q,n); 317
318 mike 1.112 MessageLoaderParms parms(
319 thilo.boehm 1.138 "Common.String.BAD_UTF8_LONG",
320 mike 1.112 "The byte sequence starting at index $0 "
321 thilo.boehm 1.138 "is not valid UTF-8 encoding: $1", 322 index,buffer); 323
324 mike 1.112 throw Exception(parms); 325 } 326
327 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
328 mike 1.112 // terminator). 329 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 330 { 331 // The following employs loop unrolling for efficiency. Please do not 332 // eliminate. 333 334 const Uint16* q = src; 335 Uint8* p = (Uint8*)dest; 336 337 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
338 kumpf 1.82 {
339 mike 1.112 p[0] = q[0]; 340 p[1] = q[1]; 341 p[2] = q[2]; 342 p[3] = q[3]; 343 p += 4; 344 q += 4; 345 n -= 4;
346 kumpf 1.82 }
347 mike 1.112 348 switch (n) 349 { 350 case 0: 351 return p - (Uint8)dest; 352 case 1: 353 if (q[0] < 128) 354 { 355 p[0] = q[0]; 356 return p + 1 - (Uint8)dest; 357 } 358 break; 359 case 2: 360 if (q[0] < 128 && q[1] < 128) 361 { 362 p[0] = q[0]; 363 p[1] = q[1]; 364 return p + 2 - (Uint8)dest; 365 } 366 break; 367 case 3: 368 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 369 { 370 p[0] = q[0]; 371 p[1] = q[1]; 372 p[2] = q[2]; 373 return p + 3 - (Uint8)dest; 374 } 375 break; 376 } 377 378 // If this line was reached, there must be characters greater than 128. 379 380 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 381 382 return p - (Uint8*)dest;
383 kumpf 1.54 } 384
385 mike 1.112 //============================================================================== 386 // 387 // class CString 388 // 389 //============================================================================== 390 391 CString::CString(const CString& cstr) : _rep(0)
392 kumpf 1.54 {
393 mike 1.112 if (cstr._rep)
394 kumpf 1.82 {
395 mike 1.112 size_t n = strlen(cstr._rep) + 1; 396 _rep = (char*)operator new(n); 397 memcpy(_rep, cstr._rep, n);
398 kumpf 1.82 }
399 kumpf 1.54 } 400
401 kumpf 1.56 CString& CString::operator=(const CString& cstr) 402 {
403 kumpf 1.82 if (&cstr != this)
404 kumpf 1.81 {
405 kumpf 1.82 if (_rep) 406 {
407 mike 1.112 operator delete(_rep);
408 kumpf 1.82 _rep = 0; 409 }
410 mike 1.112
411 kumpf 1.82 if (cstr._rep) 412 {
413 mike 1.112 size_t n = strlen(cstr._rep) + 1; 414 _rep = (char*)operator new(n); 415 memcpy(_rep, cstr._rep, n);
416 kumpf 1.82 }
417 kumpf 1.81 }
418 mike 1.112
419 kumpf 1.56 return *this; 420 } 421
422 mike 1.112 //==============================================================================
423 kumpf 1.54 //
424 mike 1.112 // class StringRep
425 kumpf 1.39 //
426 mike 1.112 //==============================================================================
427 kumpf 1.39
428 mike 1.112 StringRep StringRep::_emptyRep;
429 mike 1.27
430 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
431 mike 1.27 {
432 dave.sudlik 1.120 // Check for potential overflow in cap 433 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
434 mike 1.27
435 mike 1.112 StringRep* rep = (StringRep)::operator new( 436 sizeof(StringRep) + cap sizeof(Uint16)); 437 rep->cap = cap; 438 new(&rep->refs) AtomicInt(1); 439 440 return rep;
441 mike 1.27 } 442
443 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
444 chuck 1.102 {
445 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
446 chuck 1.102 {
447 mike 1.112 size_t n = _roundUpToPow2(cap); 448 StringRep* newRep = StringRep::alloc(n); 449 newRep->size = rep->size; 450 _copy(newRep->data, rep->data, rep->size + 1); 451 StringRep::unref(rep); 452 rep = newRep; 453 } 454 }
455 david.dillard 1.105
456 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 457 { 458 StringRep* rep = StringRep::alloc(size); 459 rep->size = size; 460 _copy(rep->data, data, size); 461 rep->data[size] = '\0'; 462 return rep; 463 }
464 chuck 1.102
465 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 466 { 467 // Return a new copy of rep. Release rep.
468 chuck 1.102
469 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 470 newRep->size = rep->size; 471 _copy(newRep->data, rep->data, rep->size); 472 newRep->data[newRep->size] = '\0'; 473 StringRep::unref(rep); 474 return newRep;
475 chuck 1.102 } 476
477 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
478 kumpf 1.43 {
479 mike 1.112 StringRep* rep = StringRep::alloc(size); 480 size_t utf8_error_index; 481 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 482 483 if (rep->size == size_t(-1)) 484 { 485 StringRep::free(rep);
486 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index, data,size);
487 mike 1.112 }
488 kumpf 1.43
489 mike 1.112 rep->data[rep->size] = '\0';
490 kumpf 1.43
491 mike 1.112 return rep;
492 mike 1.27 } 493
494 mike 1.112 Uint32 StringRep::length(const Uint16* str)
495 mike 1.27 {
496 mike 1.112 // Note: We could unroll this but it is rarely called. 497 498 const Uint16* end = (Uint16)str; 499 500 while (end++) 501 ; 502
503 a.dunfey 1.125 return (Uint32)(end - str - 1);
504 kumpf 1.39 }
505 tony 1.66
506 mike 1.112 //============================================================================== 507 // 508 // class String 509 // 510 //============================================================================== 511 512 const String String::EMPTY;
513 mike 1.27
514 kumpf 1.39 String::String(const String& str, Uint32 n) 515 {
516 mike 1.112 _checkBounds(n, str._rep->size); 517 _rep = StringRep::create(str._rep->data, n);
518 kumpf 1.39 } 519 520 String::String(const Char16* str) 521 {
522 mike 1.112 _checkNullPointer(str); 523 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
524 mike 1.27 } 525
526 kumpf 1.39 String::String(const Char16* str, Uint32 n) 527 {
528 mike 1.112 _checkNullPointer(str); 529 _rep = StringRep::create((Uint16*)str, n);
530 kumpf 1.39 } 531 532 String::String(const char* str)
533 mike 1.27 {
534 mike 1.112 _checkNullPointer(str);
535 david.dillard 1.105
536 mike 1.112 // Set this just in case create() throws an exception. 537 _rep = &StringRep::_emptyRep; 538 _rep = StringRep::create(str, strlen(str));
539 mike 1.27 } 540
541 kumpf 1.39 String::String(const char* str, Uint32 n)
542 mike 1.27 {
543 mike 1.112 _checkNullPointer(str);
544 david.dillard 1.105
545 mike 1.112 // Set this just in case create() throws an exception. 546 _rep = &StringRep::_emptyRep; 547 _rep = StringRep::create(str, n);
548 kumpf 1.39 }
549 mike 1.27
550 mike 1.112 String::String(const String& s1, const String& s2)
551 kumpf 1.39 {
552 mike 1.112 size_t n1 = s1._rep->size; 553 size_t n2 = s2._rep->size; 554 size_t n = n1 + n2; 555 _rep = StringRep::alloc(n); 556 _copy(_rep->data, s1._rep->data, n1); 557 _copy(_rep->data + n1, s2._rep->data, n2); 558 _rep->size = n; 559 _rep->data[n] = '\0';
560 mike 1.27 } 561
562 mike 1.112 String::String(const String& s1, const char* s2)
563 mike 1.27 {
564 mike 1.112 _checkNullPointer(s2); 565 size_t n1 = s1._rep->size; 566 size_t n2 = strlen(s2); 567 _rep = StringRep::alloc(n1 + n2); 568 _copy(_rep->data, s1._rep->data, n1); 569 size_t utf8_error_index; 570 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 571 572 if (tmp == size_t(-1))
573 kumpf 1.82 {
574 mike 1.112 StringRep::free(_rep); 575 _rep = &StringRep::_emptyRep;
576 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);
577 kumpf 1.82 }
578 mike 1.112 579 _rep->size = n1 + tmp; 580 _rep->data[_rep->size] = '\0';
581 mike 1.27 } 582
583 mike 1.112 String::String(const char* s1, const String& s2)
584 mike 1.27 {
585 mike 1.112 _checkNullPointer(s1); 586 size_t n1 = strlen(s1); 587 size_t n2 = s2._rep->size; 588 _rep = StringRep::alloc(n1 + n2); 589 size_t utf8_error_index; 590 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 591 592 if (tmp == size_t(-1)) 593 { 594 StringRep::free(_rep); 595 _rep = &StringRep::_emptyRep;
596 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);
597 mike 1.112 } 598 599 _rep->size = n2 + tmp; 600 _copy(_rep->data + n1, s2._rep->data, n2); 601 _rep->data[_rep->size] = '\0';
602 mike 1.27 } 603
604 mike 1.112 String& String::assign(const String& str)
605 mike 1.27 {
606 mike 1.112 if (_rep != str._rep)
607 david.dillard 1.105 {
608 mike 1.112 StringRep::unref(_rep); 609 StringRep::ref(_rep = str._rep);
610 david.dillard 1.105 } 611
612 mike 1.27 return this; 613 } 614 615 String& String::assign(const Char16 str, Uint32 n) 616 {
617 mike 1.112 _checkNullPointer(str); 618
619 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
620 david.dillard 1.105 {
621 mike 1.112 StringRep::unref(_rep); 622 _rep = StringRep::alloc(n);
623 david.dillard 1.105 } 624
625 mike 1.112 _rep->size = n; 626 _copy(_rep->data, (Uint16*)str, n); 627 _rep->data[n] = '\0'; 628
629 mike 1.27 return *this; 630 } 631
632 mike 1.112 String& String::assign(const char* str, Uint32 n)
633 chuck 1.102 {
634 mike 1.112 _checkNullPointer(str); 635
636 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
637 david.dillard 1.105 {
638 mike 1.112 StringRep::unref(_rep); 639 _rep = StringRep::alloc(n);
640 david.dillard 1.105 } 641
642 mike 1.112 size_t utf8_error_index; 643 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
644 chuck 1.102
645 mike 1.112 if (_rep->size == size_t(-1))
646 david.dillard 1.105 {
647 mike 1.112 StringRep::free(_rep); 648 _rep = &StringRep::_emptyRep;
649 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,str,n);
650 david.dillard 1.105 }
651 mike 1.112 652 _rep->data[_rep->size] = 0;
653 david.dillard 1.105
654 mike 1.27 return *this; 655 } 656
657 kumpf 1.39 void String::clear() 658 {
659 mike 1.112 if (_rep->size) 660 {
661 mike 1.114 if (_rep->refs.get() == 1)
662 mike 1.112 { 663 _rep->size = 0; 664 _rep->data[0] = '\0'; 665 } 666 else 667 { 668 StringRep::unref(_rep); 669 _rep = &StringRep::_emptyRep; 670 } 671 }
672 kumpf 1.39 } 673
674 mike 1.112 void String::reserveCapacity(Uint32 cap)
675 kumpf 1.39 {
676 mike 1.112 _reserve(_rep, cap);
677 kumpf 1.39 } 678
679 mike 1.112 CString String::getCString() const 680 {
681 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16 682 // counterpart, so we allocate extra memory for the worst case. In the
683 mike 1.112 // best case, we may need only one third of the memory allocated. But
684 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since 685 // CString objects are usually short-lived (disappearing after only a few
686 mike 1.112 // instructions). CString objects are typically created on the stack as 687 // means to obtain a char* pointer. 688 689 #ifdef PEGASUS_STRING_NO_UTF8 690 char* str = (char*)operator new(_rep->size + 1); 691 _copy(str, _rep->data, _rep->size); 692 str[_rep->size] = '\0'; 693 return CString(str);
694 gs.keenan 1.110 #else
695 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
696 mike 1.112 char* str = (char*)operator new(n + 1); 697 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 698 str[size] = '\0'; 699 return CString(str);
700 gs.keenan 1.110 #endif
701 kumpf 1.39 } 702
703 mike 1.112 String& String::append(const Char16* str, Uint32 n)
704 kumpf 1.39 {
705 mike 1.112 _checkNullPointer(str); 706 707 size_t oldSize = _rep->size; 708 size_t newSize = oldSize + n;
709 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
710 mike 1.112 _copy(_rep->data + oldSize, (Uint16)str, n); 711 _rep->size = newSize; 712 _rep->data[newSize] = '\0'; 713 714 return this;
715 kumpf 1.39 } 716
717 mike 1.112 String& String::append(const String& str)
718 mike 1.27 {
719 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
720 mike 1.27 } 721
722 mike 1.112 String& String::append(const char* str, Uint32 size)
723 mike 1.27 {
724 mike 1.112 _checkNullPointer(str); 725 726 size_t oldSize = _rep->size; 727 size_t cap = oldSize + size; 728
729 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
730 mike 1.112 size_t utf8_error_index; 731 size_t tmp = _convert( 732 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 733 734 if (tmp == size_t(-1)) 735 { 736 StringRep::free(_rep); 737 _rep = &StringRep::_emptyRep;
738 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,str,size);
739 mike 1.112 }
740 mike 1.27
741 mike 1.112 _rep->size += tmp; 742 _rep->data[_rep->size] = '\0';
743 mike 1.27
744 kumpf 1.39 return *this; 745 } 746
747 mike 1.112 void String::remove(Uint32 index, Uint32 n)
748 mike 1.27 {
749 mike 1.112 if (n == PEG_NOT_FOUND)
750 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
751 mike 1.112 752 _checkBounds(index + n, _rep->size); 753
754 mike 1.114 if (_rep->refs.get() != 1)
755 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
756 mike 1.27
757 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
758 mike 1.27
759 mike 1.112 size_t rem = _rep->size - (index + n); 760 Uint16* data = _rep->data;
761 mike 1.27
762 mike 1.112 if (rem) 763 memmove(data + index, data + index + n, rem * sizeof(Uint16));
764 mike 1.27
765 mike 1.112 _rep->size -= n; 766 data[_rep->size] = '\0';
767 mike 1.27 } 768
769 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
770 mike 1.27 {
771 mike 1.112 // Note: this implementation is very permissive but used for 772 // backwards compatibility. 773 774 if (index < _rep->size)
775 mike 1.27 {
776 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index)
777 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
778 mike 1.27
779 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
780 mike 1.27 }
781 david.dillard 1.105 782 return String();
783 mike 1.27 } 784 785 Uint32 String::find(Char16 c) const 786 {
787 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
788 mike 1.27
789 mike 1.112 if (p)
790 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
791 mike 1.27 792 return PEG_NOT_FOUND; 793 } 794
795 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
796 mike 1.30 {
797 mike 1.112 _checkBounds(index, _rep->size); 798 799 if (index >= _rep->size) 800 return PEG_NOT_FOUND; 801 802 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
803 mike 1.30
804 mike 1.112 if (p)
805 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
806 mike 1.30 807 return PEG_NOT_FOUND; 808 } 809
810 mike 1.112 Uint32 StringFindAux( 811 const StringRep* _rep, const Char16* s, Uint32 n)
812 mike 1.27 {
813 mike 1.112 _checkNullPointer(s);
814 mike 1.27
815 mike 1.112 const Uint16* data = _rep->data; 816 size_t rem = _rep->size; 817 818 while (n <= rem)
819 mike 1.30 {
820 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 821 822 if (!p) 823 break;
824 mike 1.30
825 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
826 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
827 david.dillard 1.105
828 mike 1.112 p++; 829 rem -= p - data; 830 data = p;
831 mike 1.27 }
832 mike 1.112
833 mike 1.27 return PEG_NOT_FOUND; 834 } 835
836 mike 1.112 Uint32 String::find(const char* s) const 837 { 838 _checkNullPointer(s); 839 840 // Note: could optimize away creation of temporary, but this is rarely 841 // called. 842 return find(String(s)); 843 } 844
845 mike 1.27 Uint32 String::reverseFind(Char16 c) const 846 {
847 mike 1.112 Uint16 x = c; 848 Uint16* p = _rep->data; 849 Uint16* q = _rep->data + _rep->size;
850 mike 1.27
851 mike 1.112 while (q != p)
852 mike 1.27 {
853 mike 1.112 if (*--q == x)
854 david.dillard 1.116 return static_cast<Uint32>(q - p);
855 mike 1.27 } 856 857 return PEG_NOT_FOUND; 858 } 859 860 void String::toLower() 861 {
862 david 1.69 #ifdef PEGASUS_HAS_ICU
863 mike 1.112
864 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
865 david 1.90 {
866 mike 1.114 if (_rep->refs.get() != 1)
867 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 868
869 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
870 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 871 // prevents optimizations where the us-ascii is converted before
872 mike 1.112 // calling ICU.
873 yi.zhou 1.108 // The string may shrink or expand after the convert. 874
875 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 876 //// only the size when zero is passed as the destination size argument. 877
878 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 879
880 mike 1.112 int32_t newSize = u_strToLower( 881 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
882 david.dillard 1.116
883 mike 1.112 err = U_ZERO_ERROR; 884 885 //// Reserve enough space for the result. 886 887 if ((Uint32)newSize > _rep->cap) 888 _reserve(_rep, newSize); 889 890 //// Perform the conversion (overlapping buffers are allowed).
891 chuck 1.99
892 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 893 (UChar)_rep->data, _rep->size, NULL, &err);
894 yi.zhou 1.108
895 mike 1.112 _rep->size = newSize; 896 return;
897 david 1.90 }
898 mike 1.112 899 #endif /* PEGASUS_HAS_ICU */ 900
901 mike 1.114 if (_rep->refs.get() != 1)
902 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 903 904 Uint16* p = _rep->data; 905 size_t n = _rep->size; 906 907 for (; n--; p++)
908 david 1.90 {
909 mike 1.112 if (!(p & 0xFF00)) 910 p = _toLower(*p);
911 mike 1.27 }
912 kumpf 1.39 } 913
914 chuck 1.99 void String::toUpper()
915 david 1.90 { 916 #ifdef PEGASUS_HAS_ICU
917 mike 1.112
918 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
919 chuck 1.99 {
920 mike 1.114 if (_rep->refs.get() != 1)
921 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 922
923 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
924 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 925 // prevents optimizations where the us-ascii is converted before
926 mike 1.112 // calling ICU.
927 yi.zhou 1.108 // The string may shrink or expand after the convert. 928
929 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 930 //// only the size when zero is passed as the destination size argument. 931
932 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 933
934 mike 1.112 int32_t newSize = u_strToUpper( 935 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 936 937 err = U_ZERO_ERROR; 938 939 //// Reserve enough space for the result. 940 941 if ((Uint32)newSize > _rep->cap) 942 _reserve(_rep, newSize); 943 944 //// Perform the conversion (overlapping buffers are allowed). 945 946 u_strToUpper((UChar)_rep->data, newSize, 947 (UChar*)_rep->data, _rep->size, NULL, &err);
948 chuck 1.99
949 mike 1.112 _rep->size = newSize;
950 yi.zhou 1.108
951 mike 1.112 return;
952 david 1.91 }
953 mike 1.112 954 #endif /* PEGASUS_HAS_ICU */ 955
956 mike 1.114 if (_rep->refs.get() != 1)
957 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 958 959 Uint16* p = _rep->data; 960 size_t n = _rep->size; 961 962 for (; n--; p++) 963 p = _toUpper(p);
964 david 1.90 } 965
966 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
967 kumpf 1.39 {
968 kumpf 1.118 const Uint16* p1 = s1._rep->data; 969 const Uint16* p2 = s2._rep->data;
970 mike 1.27
971 kumpf 1.118 while (n--) 972 { 973 int r = p1++ - p2++; 974 if (r) 975 { 976 return r; 977 } 978 else if (!p1[-1]) 979 { 980 // We must have encountered a null terminator in both s1 and s2 981 return 0; 982 } 983 } 984 return 0;
985 mike 1.27 } 986
987 kumpf 1.43 int String::compare(const String& s1, const String& s2)
988 mike 1.30 {
989 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 990 }
991 kumpf 1.43
992 mike 1.112 int String::compare(const String& s1, const char* s2) 993 { 994 _checkNullPointer(s2);
995 mike 1.30
996 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 997 return _compareNoUTF8(s1._rep->data, s2); 998 #else 999 // ATTN: optimize this! 1000 return String::compare(s1, String(s2)); 1001 #endif
1002 mike 1.30 } 1003
1004 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
1005 kumpf 1.40 {
1006 david 1.69 #ifdef PEGASUS_HAS_ICU
1007 mike 1.112
1008 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1009 {
1010 mike 1.112 return u_strcasecmp(
1011 dave.sudlik 1.124 (const UChar)str1._rep->data, 1012 (const UChar)str2._rep->data, 1013 U_FOLD_CASE_DEFAULT 1014 );
1015 yi.zhou 1.108 }
1016 kumpf 1.40
1017 mike 1.112 #endif /* PEGASUS_HAS_ICU / 1018 1019 const Uint16 s1 = str1._rep->data; 1020 const Uint16* s2 = str2._rep->data; 1021 1022 while (s1 && s2)
1023 kumpf 1.40 {
1024 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
1025 kumpf 1.40
1026 david.dillard 1.105 if (r) 1027 return r;
1028 kumpf 1.40 } 1029
1030 mike 1.112 if (*s2)
1031 david.dillard 1.105 return -1;
1032 mike 1.112 else if (*s1)
1033 david.dillard 1.105 return 1;
1034 kumpf 1.40 1035 return 0; 1036 } 1037
1038 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1039 mike 1.27 {
1040 mike 1.112 #ifdef PEGASUS_HAS_ICU 1041 1042 return String::compareNoCase(s1, s2) == 0; 1043 1044 #else /* PEGASUS_HAS_ICU */
1045 mike 1.27
1046 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 1047 // eliminate.
1048 kumpf 1.39
1049 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1050 Uint16 q = (Uint16*)s2.getChar16Data(); 1051 Uint32 n = s2.size(); 1052 1053 while (n >= 8) 1054 { 1055 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1056 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1057 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1058 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1059 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1060 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1061 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1062 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1063 { 1064 return false; 1065 }
1066 kumpf 1.39
1067 mike 1.112 n -= 8; 1068 p += 8; 1069 q += 8; 1070 }
1071 mike 1.27
1072 mike 1.112 while (n >= 4)
1073 kumpf 1.39 {
1074 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1075 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1076 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1077 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1078 david.dillard 1.105 {
1079 mike 1.112 return false;
1080 david.dillard 1.105 }
1081 mike 1.112 1082 n -= 4; 1083 p += 4; 1084 q += 4; 1085 } 1086 1087 while (n--) 1088 { 1089 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1090 david.dillard 1.105 return false;
1091 mike 1.112 1092 p++; 1093 q++;
1094 kumpf 1.39 }
1095 mike 1.28
1096 kumpf 1.39 return true;
1097 mike 1.112 1098 #endif /* PEGASUS_HAS_ICU */
1099 david 1.69 } 1100
1101 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1102 david 1.69 {
1103 mike 1.112 _checkNullPointer(s2);
1104 david 1.69
1105 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1106 david 1.69
1107 mike 1.112 return String::equalNoCase(s1, String(s2));
1108 david 1.69
1109 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1110 david 1.69
1111 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1112 const char p2 = s2; 1113 size_t n = s1._rep->size;
1114 david.dillard 1.105
1115 mike 1.112 while (n--) 1116 { 1117 if (!*p2) 1118 return false;
1119 david 1.71
1120 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1121 return false; 1122 }
1123 kumpf 1.42
1124 mike 1.112 if (*p2) 1125 return false;
1126 david.dillard 1.116
1127 mike 1.112 return true;
1128 karl 1.36
1129 mike 1.112 #else /* PEGASUS_HAS_ICU */
1130 david.dillard 1.105
1131 mike 1.112 // ATTN: optimize this! 1132 return String::equalNoCase(s1, String(s2));
1133 david.dillard 1.105
1134 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1135 }
1136 chuck 1.78
1137 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1138 karl 1.36 {
1139 marek 1.131 return (s1._rep == s2._rep) \|\|
1140 marek 1.137 ((s1._rep->size == s2._rep->size) && 1141 memcmp(s1._rep->data, 1142 s2._rep->data, 1143 s1._rep->size * sizeof(Uint16)) == 0);
1144 karl 1.36 } 1145
1146 karl 1.140 void String::appendPrintf(const char* format, ...) 1147 { 1148 va_list ap; 1149 va_start(ap, format); 1150 1151 // Format into allocated memory 1152 ////char* rtnCharPtr = _charVPrintf(format, ap); 1153 1154 // Iniitial allocation size. This is a guess assuming that 1155 // most printfs are one or two lines long 1156 int allocSize = 256; 1157 int rtnSize; 1158 char p; 1159 1160 // initial allocate for output 1161 if ((p = (char)malloc(allocSize)) == NULL) 1162 { 1163 return; 1164 } 1165 1166 // repeat formatting with increased realloc until it works. 1167 karl 1.140 do 1168 { 1169 rtnSize = vsnprintf(p, allocSize, format, ap); 1170 1171 // return if successful; i.e. if not negative and 1172 // returns less than allocated size. 1173 if (rtnSize > -1 && rtnSize < allocSize) 1174 { 1175 break; 1176 } 1177 1178 // increment alloc size. Positive return is 1179 // expected size and negative is error. 1180 allocSize = (rtnSize > -1)? (rtnSize + 1) : allocSize * 2; 1181 1182 } while((p = (char*)peg_inln_realloc(p, allocSize)) != NULL); 1183 1184 // get here only with error in malloc. 1185 1186 va_end(ap); 1187 1188 karl 1.140 // Free allocated memory append printf output to current string 1189 append(p, rtnSize); 1190 free(p); 1191 } 1192
1193 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1194 { 1195 #ifdef PEGASUS_STRING_NO_UTF8
1196 kumpf 1.35
1197 mike 1.112 _checkNullPointer(s2);
1198 kumpf 1.39
1199 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1200 const char q = s2;
1201 kumpf 1.39
1202 mike 1.112 while (p && q) 1203 { 1204 if (p++ != Uint16(q++)) 1205 return false; 1206 }
1207 kumpf 1.39
1208 mike 1.112 return !(p \|\| q);
1209 kumpf 1.39
1210 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1211 kumpf 1.39
1212 mike 1.112 return String::equal(s1, String(s2));
1213 kumpf 1.39
1214 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1215 kumpf 1.39 } 1216
1217 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1218 kumpf 1.39 {
1219 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1220 david 1.69
1221 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1222 {
1223 david.dillard 1.105 char buf = NULL; 1224 const int size = str.size() 6;
1225 mike 1.112 UnicodeString UniStr( 1226 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1227 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1228 buf = new char[bufsize+1]; 1229 UniStr.extract(0,bufsize,buf); 1230 os << buf; 1231 os.flush(); 1232 delete [] buf;
1233 david.dillard 1.116 return os;
1234 yi.zhou 1.108 }
1235 mike 1.112
1236 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
1237 mike 1.112 1238 for (Uint32 i = 0, n = str.size(); i < n; i++)
1239 yi.zhou 1.108 {
1240 mike 1.112 Uint16 code = str[i];
1241 david.dillard 1.105
1242 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1243 os << char(code); 1244 else 1245 { 1246 // Print in hex format: 1247 char buffer[8]; 1248 sprintf(buffer, "\\x%04X", code); 1249 os << buffer;
1250 david.dillard 1.105 }
1251 yi.zhou 1.108 }
1252 kumpf 1.39 1253 return os; 1254 } 1255
1256 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1257 kumpf 1.39 {
1258 mike 1.112 StringRep* tmp; 1259 1260 if (_rep->cap) 1261 { 1262 tmp = StringRep::alloc(2 * _rep->cap); 1263 tmp->size = _rep->size; 1264 _copy(tmp->data, _rep->data, _rep->size); 1265 } 1266 else 1267 { 1268 tmp = StringRep::alloc(8); 1269 tmp->size = 0; 1270 } 1271 1272 StringRep::unref(_rep); 1273 _rep = tmp;
1274 kumpf 1.39 } 1275
1276 thilo.boehm 1.128 void AssignASCII(String& s, const char* str, Uint32 n) 1277 { 1278 class StringLayout 1279 { 1280 public: 1281 StringRep* rep; 1282 }; 1283
1284 kumpf 1.130 StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1285 thilo.boehm 1.128 1286 _checkNullPointer(str); 1287 1288 if (n > that->rep->cap \|\| that->rep->refs.get() != 1) 1289 { 1290 StringRep::unref(that->rep); 1291 that->rep = StringRep::alloc(n); 1292 } 1293 1294 _copy(that->rep->data, str, n); 1295 that->rep->size = n; 1296 that->rep->data[that->rep->size] = 0; 1297 } 1298
1299 mike 1.112 PEGASUS_NAMESPACE_END 1300 1301 /* 1302 ================================================================================ 1303 1304 String optimizations: 1305 1306 1. Added mechanism allowing certain functions to be inlined only when 1307 used by internal Pegasus modules. External modules (i.e., providers) 1308 link to a non-inline version, which allows for binary compatibility. 1309 1310 2. Implemented copy-on-write with atomic increment/decrement. This 1311 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1312 for the 'ni1000' benchmark. 1313 1314 3. Employed loop unrolling in several places. For example, see: 1315 1316 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1317 1318 4. Used the "empty-rep" optimization (described in whitepaper from the 1319 GCC Developers Summit). This reduced default construction to a simple 1320 mike 1.112 pointer assignment. 1321 1322 inline String::String() : _rep(&_emptyRep) { } 1323 1324 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1325 For example: 1326 1327 static const char _upper[] = 1328 { 1329 0,1,2,...255 1330 }; 1331 1332 inline Uint16 _toUpper(Uint16 x) 1333 { 1334 return (x & 0xFF00) ? x : _upper[x]; 1335 } 1336
1337 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
1338 mike 1.112 operation. 1339
1340 david.dillard 1.116 6. Implemented char* version of the following member functions to 1341 eliminate unecessary creation of anonymous string objects
1342 mike 1.112 (temporaries). 1343 1344 String(const String& s1, const char* s2); 1345 String(const char* s1, const String& s2); 1346 String& String::operator=(const char* str); 1347 Uint32 String::find(const char* s) const; 1348 bool String::equal(const String& s1, const char* s2); 1349 static int String::compare(const String& s1, const char* s2); 1350 String& String::append(const char* str); 1351 String& String::append(const char* str, Uint32 size); 1352 static bool String::equalNoCase(const String& s1, const char* s2); 1353 String& operator=(const char* str) 1354 String& String::assign(const char* str) 1355 String& String::append(const char* str) 1356 Boolean operator==(const String& s1, const char* s2) 1357 Boolean operator==(const char* s1, const String& s2) 1358 Boolean operator!=(const String& s1, const char* s2) 1359 Boolean operator!=(const char* s1, const String& s2) 1360 Boolean operator<(const String& s1, const char* s2) 1361 Boolean operator<(const char* s1, const String& s2) 1362 Boolean operator>(const String& s1, const char* s2) 1363 mike 1.112 Boolean operator>(const char* s1, const String& s2) 1364 Boolean operator<=(const String& s1, const char* s2) 1365 Boolean operator<=(const char* s1, const String& s2) 1366 Boolean operator>=(const String& s1, const char* s2) 1367 Boolean operator>=(const char* s1, const String& s2) 1368 String operator+(const String& s1, const char* s2) 1369 String operator+(const char* s1, const String& s2) 1370
1371 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
1372 mike 1.112 power of two (algorithm from the book "Hacker's Delight"). 1373 1374 static Uint32 _roundUpToPow2(Uint32 x) 1375 { 1376 if (x < 8) 1377 return 8; 1378 1379 x--; 1380 x \|= (x >> 1); 1381 x \|= (x >> 2); 1382 x \|= (x >> 4); 1383 x \|= (x >> 8); 1384 x \|= (x >> 16); 1385 x++; 1386 1387 return x; 1388 } 1389 1390 8. Implemented "concatenating constructors" to eliminate temporaries
1391 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
1392 mike 1.112 optimization" described by Stan Lippman. 1393 1394 inline String operator+(const String& s1, const String& s2) 1395 { 1396 return String(s1, s2, 0); 1397 } 1398 1399 9. Experimented to find the optimial initial size for a short string. 1400 Eight seems to offer the best tradeoff between space and time. 1401 1402 10. Inlined all members of the Char16 class. 1403 1404 11. Used Uint16 internally in the String class. This showed no improvememnt 1405 since Char16 was already fully inlined and was essentially reduced to 1406 Uint16 in any case. 1407 1408 12. Implemented conditional logic (#if) allowing error checking logic to
1409 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
1410 mike 1.112 and null-pointer checking. 1411 1412 13. Used memcpy() and memcmp() where possible. These are implemented using 1413 the rep family of intructions under Intel and are much faster. 1414
1415 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1416 mike 1.112 copy routine overhead. 1417 1418 15. Added ASCII7 form of the constructor and assign(). 1419 1420 String s("hello world", String::ASCII7); 1421 1422 s.assignASCII7("hello world"); 1423 1424 This avoids slower UTF8 processing when not needed. 1425 1426 ================================================================================ 1427 */

No CVS admin address has been configured