pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
2 martin 1.134 //
3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license 4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with 5 // this work for additional information regarding copyright ownership. 6 // Each contributor licenses this file to you under the OpenPegasus Open 7 // Source License; you may not use this file except in compliance with the 8 // License.
9 martin 1.134 //
10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a 11 // copy of this software and associated documentation files (the "Software"), 12 // to deal in the Software without restriction, including without limitation 13 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 // and/or sell copies of the Software, and to permit persons to whom the 15 // Software is furnished to do so, subject to the following conditions:
16 martin 1.134 //
17 martin 1.133 // The above copyright notice and this permission notice shall be included 18 // in all copies or substantial portions of the Software.
19 martin 1.134 //
20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 martin 1.134 //
28 martin 1.133 //////////////////////////////////////////////////////////////////////////
29 mike 1.27 // 30 //%///////////////////////////////////////////////////////////////////////////// 31
32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
33 mike 1.113 #include <cstring>
34 kumpf 1.48 #include "InternalException.h"
35 mike 1.112 #include "MessageLoader.h" 36 #include "StringRep.h"
37 david 1.69 38 #ifdef PEGASUS_HAS_ICU
39 kumpf 1.132 # include <unicode/ures.h> 40 # include <unicode/ustring.h> 41 # include <unicode/uchar.h>
42 david 1.69 #endif 43
44 mike 1.112 PEGASUS_NAMESPACE_BEGIN
45 mike 1.28
46 mike 1.112 //============================================================================== 47 // 48 // Compile-time macros (undefined by default). 49 // 50 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 51 // 52 //==============================================================================
53 mike 1.27
54 mike 1.112 //==============================================================================
55 kumpf 1.39 //
56 mike 1.112 // File-scope definitions:
57 kumpf 1.54 //
58 mike 1.112 //============================================================================== 59 60 // Note: this table is much faster than the system toupper(). Please do not 61 // change.
62 kumpf 1.54
63 david.dillard 1.116 const Uint8 _toUpperTable[256] =
64 kumpf 1.54 {
65 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 66 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 67 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 68 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 69 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 70 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 71 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 72 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 73 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 74 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 75 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 76 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 77 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 78 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 79 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 80 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 81 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 82 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 83 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 84 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 85 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 86 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 87 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 88 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 89 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 90 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 91 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 92 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 93 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 94 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 95 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 96 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 97 }; 98
99 dev.meetei 1.139 // Note: this table is much faster than the system tolower(). Please do not
100 mike 1.112 // change. 101
102 david.dillard 1.116 const Uint8 _toLowerTable[256] =
103 mike 1.112 { 104 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 105 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 106 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 107 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 108 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 109 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 110 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 111 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 112 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 113 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 114 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 115 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 116 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 117 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 118 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 119 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 120 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 121 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 122 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 123 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 124 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 125 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 126 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 127 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 128 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 129 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 130 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 131 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 132 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 133 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 134 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 135 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 136 }; 137 138 // Converts 16-bit characters to upper case. This routine is faster than the 139 // system toupper(). Please do not change. 140 inline Uint16 _toUpper(Uint16 x) 141 { 142 return (x & 0xFF00) ? x : _toUpperTable[x];
143 kumpf 1.54 } 144
145 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 146 // system toupper(). Please do not change. 147 inline Uint16 _toLower(Uint16 x)
148 kumpf 1.54 {
149 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 150 } 151 152 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 153 static Uint32 _roundUpToPow2(Uint32 x) 154 {
155 dave.sudlik 1.120 // Check for potential overflow in x 156 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
157 mike 1.112 158 if (x < 8) 159 return 8; 160 161 x--; 162 x \|= (x >> 1); 163 x \|= (x >> 2); 164 x \|= (x >> 4); 165 x \|= (x >> 8); 166 x \|= (x >> 16); 167 x++; 168 169 return x; 170 } 171 172 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 173 { 174 // The following employs loop unrolling for efficiency. Please do not 175 // eliminate. 176 177 while (n >= 4) 178 mike 1.112 { 179 if (s[0] == c) 180 return (Uint16)s; 181 if (s[1] == c) 182 return (Uint16)&s[1]; 183 if (s[2] == c) 184 return (Uint16)&s[2]; 185 if (s[3] == c) 186 return (Uint16)&s[3];
187 kumpf 1.82
188 mike 1.112 n -= 4; 189 s += 4; 190 } 191 192 if (n) 193 { 194 if (s == c) 195 return (Uint16)s; 196 s++; 197 n--; 198 } 199 200 if (n) 201 { 202 if (s == c) 203 return (Uint16)s; 204 s++; 205 n--; 206 } 207 208 if (n && s == c) 209 mike 1.112 return (Uint16)s; 210 211 // Not found! 212 return 0; 213 } 214 215 static int _compare(const Uint16* s1, const Uint16* s2) 216 { 217 while (s1 && s2) 218 { 219 int r = s1++ - s2++; 220 221 if (r) 222 return r; 223 } 224 225 if (s2) 226 return -1; 227 else if (s1) 228 return 1; 229 230 mike 1.112 return 0; 231 } 232
233 kumpf 1.130 #ifdef PEGASUS_STRING_NO_UTF8
234 mike 1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2) 235 { 236 Uint16 c1; 237 Uint16 c2; 238 239 do 240 { 241 c1 = s1++; 242 c2 = s2++; 243 244 if (c1 == 0) 245 return c1 - c2; 246 } 247 while (c1 == c2); 248 249 return c1 - c2; 250 }
251 kumpf 1.130 #endif
252 mike 1.112 253 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 254 { 255 memcpy(s1, s2, n * sizeof(Uint16)); 256 } 257 258 void StringThrowOutOfBounds() 259 { 260 throw IndexOutOfBoundsException(); 261 } 262 263 inline void _checkNullPointer(const void* ptr) 264 { 265 if (!ptr) 266 throw NullPointer(); 267 } 268
269 thilo.boehm 1.138 #define BADUTF8_MAX_CLEAR_CHAR 40 270 #define BADUTF8_MAX_CHAR_TO_HEX 10 271 272 static void _formatBadUTF8Chars( 273 char* buffer, 274 Uint32 index, 275 const char* q, 276 size_t n )
277 mike 1.112 {
278 thilo.boehm 1.138 279 char tmp[20]; 280 const char* start; 281 282 size_t clearChar = 283 (( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR ); 284 size_t charToHex = 285 ((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ? 286 (n-index-1) : BADUTF8_MAX_CHAR_TO_HEX ); 287 288 if (index < BADUTF8_MAX_CLEAR_CHAR) 289 { 290 start = q; 291 } else 292 { 293 start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]); 294 } 295 296 // Intialize the buffer with the first character as '\0' to be able to use 297 // strnchat() and strcat() 298 buffer[0] = 0; 299 thilo.boehm 1.138 // Start the buffer with the valid UTF8 chars 300 strncat(buffer,start,clearChar); 301 for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ ) 302 { 303 tmp[0] = 0; 304 sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]); 305 strncat(buffer,&(tmp[0]),5); 306 } 307 308 } 309 310 static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n) 311 { 312 char buffer[1024]; 313 314 _formatBadUTF8Chars(&(buffer[0]),index,q,n); 315
316 mike 1.112 MessageLoaderParms parms(
317 thilo.boehm 1.138 "Common.String.BAD_UTF8_LONG",
318 mike 1.112 "The byte sequence starting at index $0 "
319 thilo.boehm 1.138 "is not valid UTF-8 encoding: $1", 320 index,buffer); 321
322 mike 1.112 throw Exception(parms); 323 } 324
325 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
326 mike 1.112 // terminator). 327 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 328 { 329 // The following employs loop unrolling for efficiency. Please do not 330 // eliminate. 331 332 const Uint16* q = src; 333 Uint8* p = (Uint8*)dest; 334 335 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
336 kumpf 1.82 {
337 mike 1.112 p[0] = q[0]; 338 p[1] = q[1]; 339 p[2] = q[2]; 340 p[3] = q[3]; 341 p += 4; 342 q += 4; 343 n -= 4;
344 kumpf 1.82 }
345 mike 1.112 346 switch (n) 347 { 348 case 0: 349 return p - (Uint8)dest; 350 case 1: 351 if (q[0] < 128) 352 { 353 p[0] = q[0]; 354 return p + 1 - (Uint8)dest; 355 } 356 break; 357 case 2: 358 if (q[0] < 128 && q[1] < 128) 359 { 360 p[0] = q[0]; 361 p[1] = q[1]; 362 return p + 2 - (Uint8)dest; 363 } 364 break; 365 case 3: 366 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 367 { 368 p[0] = q[0]; 369 p[1] = q[1]; 370 p[2] = q[2]; 371 return p + 3 - (Uint8)dest; 372 } 373 break; 374 } 375 376 // If this line was reached, there must be characters greater than 128. 377 378 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 379 380 return p - (Uint8*)dest;
381 kumpf 1.54 } 382
383 mike 1.112 //============================================================================== 384 // 385 // class CString 386 // 387 //============================================================================== 388 389 CString::CString(const CString& cstr) : _rep(0)
390 kumpf 1.54 {
391 mike 1.112 if (cstr._rep)
392 kumpf 1.82 {
393 mike 1.112 size_t n = strlen(cstr._rep) + 1; 394 _rep = (char*)operator new(n); 395 memcpy(_rep, cstr._rep, n);
396 kumpf 1.82 }
397 kumpf 1.54 } 398
399 kumpf 1.56 CString& CString::operator=(const CString& cstr) 400 {
401 kumpf 1.82 if (&cstr != this)
402 kumpf 1.81 {
403 kumpf 1.82 if (_rep) 404 {
405 mike 1.112 operator delete(_rep);
406 kumpf 1.82 _rep = 0; 407 }
408 mike 1.112
409 kumpf 1.82 if (cstr._rep) 410 {
411 mike 1.112 size_t n = strlen(cstr._rep) + 1; 412 _rep = (char*)operator new(n); 413 memcpy(_rep, cstr._rep, n);
414 kumpf 1.82 }
415 kumpf 1.81 }
416 mike 1.112
417 kumpf 1.56 return *this; 418 } 419
420 mike 1.112 //==============================================================================
421 kumpf 1.54 //
422 mike 1.112 // class StringRep
423 kumpf 1.39 //
424 mike 1.112 //==============================================================================
425 kumpf 1.39
426 mike 1.112 StringRep StringRep::_emptyRep;
427 mike 1.27
428 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
429 mike 1.27 {
430 dave.sudlik 1.120 // Check for potential overflow in cap 431 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
432 mike 1.27
433 mike 1.112 StringRep* rep = (StringRep)::operator new( 434 sizeof(StringRep) + cap sizeof(Uint16)); 435 rep->cap = cap; 436 new(&rep->refs) AtomicInt(1); 437 438 return rep;
439 mike 1.27 } 440
441 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
442 chuck 1.102 {
443 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
444 chuck 1.102 {
445 mike 1.112 size_t n = _roundUpToPow2(cap); 446 StringRep* newRep = StringRep::alloc(n); 447 newRep->size = rep->size; 448 _copy(newRep->data, rep->data, rep->size + 1); 449 StringRep::unref(rep); 450 rep = newRep; 451 } 452 }
453 david.dillard 1.105
454 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 455 { 456 StringRep* rep = StringRep::alloc(size); 457 rep->size = size; 458 _copy(rep->data, data, size); 459 rep->data[size] = '\0'; 460 return rep; 461 }
462 chuck 1.102
463 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 464 { 465 // Return a new copy of rep. Release rep.
466 chuck 1.102
467 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 468 newRep->size = rep->size; 469 _copy(newRep->data, rep->data, rep->size); 470 newRep->data[newRep->size] = '\0'; 471 StringRep::unref(rep); 472 return newRep;
473 chuck 1.102 } 474
475 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
476 kumpf 1.43 {
477 mike 1.112 StringRep* rep = StringRep::alloc(size); 478 size_t utf8_error_index; 479 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 480 481 if (rep->size == size_t(-1)) 482 { 483 StringRep::free(rep);
484 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index, data,size);
485 mike 1.112 }
486 kumpf 1.43
487 mike 1.112 rep->data[rep->size] = '\0';
488 kumpf 1.43
489 mike 1.112 return rep;
490 mike 1.27 } 491
492 mike 1.112 Uint32 StringRep::length(const Uint16* str)
493 mike 1.27 {
494 mike 1.112 // Note: We could unroll this but it is rarely called. 495 496 const Uint16* end = (Uint16)str; 497 498 while (end++) 499 ; 500
501 a.dunfey 1.125 return (Uint32)(end - str - 1);
502 kumpf 1.39 }
503 tony 1.66
504 mike 1.112 //============================================================================== 505 // 506 // class String 507 // 508 //============================================================================== 509 510 const String String::EMPTY;
511 mike 1.27
512 kumpf 1.39 String::String(const String& str, Uint32 n) 513 {
514 mike 1.112 _checkBounds(n, str._rep->size); 515 _rep = StringRep::create(str._rep->data, n);
516 kumpf 1.39 } 517 518 String::String(const Char16* str) 519 {
520 mike 1.112 _checkNullPointer(str); 521 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
522 mike 1.27 } 523
524 kumpf 1.39 String::String(const Char16* str, Uint32 n) 525 {
526 mike 1.112 _checkNullPointer(str); 527 _rep = StringRep::create((Uint16*)str, n);
528 kumpf 1.39 } 529 530 String::String(const char* str)
531 mike 1.27 {
532 mike 1.112 _checkNullPointer(str);
533 david.dillard 1.105
534 mike 1.112 // Set this just in case create() throws an exception. 535 _rep = &StringRep::_emptyRep; 536 _rep = StringRep::create(str, strlen(str));
537 mike 1.27 } 538
539 kumpf 1.39 String::String(const char* str, Uint32 n)
540 mike 1.27 {
541 mike 1.112 _checkNullPointer(str);
542 david.dillard 1.105
543 mike 1.112 // Set this just in case create() throws an exception. 544 _rep = &StringRep::_emptyRep; 545 _rep = StringRep::create(str, n);
546 kumpf 1.39 }
547 mike 1.27
548 mike 1.112 String::String(const String& s1, const String& s2)
549 kumpf 1.39 {
550 mike 1.112 size_t n1 = s1._rep->size; 551 size_t n2 = s2._rep->size; 552 size_t n = n1 + n2; 553 _rep = StringRep::alloc(n); 554 _copy(_rep->data, s1._rep->data, n1); 555 _copy(_rep->data + n1, s2._rep->data, n2); 556 _rep->size = n; 557 _rep->data[n] = '\0';
558 mike 1.27 } 559
560 mike 1.112 String::String(const String& s1, const char* s2)
561 mike 1.27 {
562 mike 1.112 _checkNullPointer(s2); 563 size_t n1 = s1._rep->size; 564 size_t n2 = strlen(s2); 565 _rep = StringRep::alloc(n1 + n2); 566 _copy(_rep->data, s1._rep->data, n1); 567 size_t utf8_error_index; 568 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 569 570 if (tmp == size_t(-1))
571 kumpf 1.82 {
572 mike 1.112 StringRep::free(_rep); 573 _rep = &StringRep::_emptyRep;
574 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);
575 kumpf 1.82 }
576 mike 1.112 577 _rep->size = n1 + tmp; 578 _rep->data[_rep->size] = '\0';
579 mike 1.27 } 580
581 mike 1.112 String::String(const char* s1, const String& s2)
582 mike 1.27 {
583 mike 1.112 _checkNullPointer(s1); 584 size_t n1 = strlen(s1); 585 size_t n2 = s2._rep->size; 586 _rep = StringRep::alloc(n1 + n2); 587 size_t utf8_error_index; 588 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 589 590 if (tmp == size_t(-1)) 591 { 592 StringRep::free(_rep); 593 _rep = &StringRep::_emptyRep;
594 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);
595 mike 1.112 } 596 597 _rep->size = n2 + tmp; 598 _copy(_rep->data + n1, s2._rep->data, n2); 599 _rep->data[_rep->size] = '\0';
600 mike 1.27 } 601
602 mike 1.112 String& String::assign(const String& str)
603 mike 1.27 {
604 mike 1.112 if (_rep != str._rep)
605 david.dillard 1.105 {
606 mike 1.112 StringRep::unref(_rep); 607 StringRep::ref(_rep = str._rep);
608 david.dillard 1.105 } 609
610 mike 1.27 return this; 611 } 612 613 String& String::assign(const Char16 str, Uint32 n) 614 {
615 mike 1.112 _checkNullPointer(str); 616
617 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
618 david.dillard 1.105 {
619 mike 1.112 StringRep::unref(_rep); 620 _rep = StringRep::alloc(n);
621 david.dillard 1.105 } 622
623 mike 1.112 _rep->size = n; 624 _copy(_rep->data, (Uint16*)str, n); 625 _rep->data[n] = '\0'; 626
627 mike 1.27 return *this; 628 } 629
630 mike 1.112 String& String::assign(const char* str, Uint32 n)
631 chuck 1.102 {
632 mike 1.112 _checkNullPointer(str); 633
634 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
635 david.dillard 1.105 {
636 mike 1.112 StringRep::unref(_rep); 637 _rep = StringRep::alloc(n);
638 david.dillard 1.105 } 639
640 mike 1.112 size_t utf8_error_index; 641 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
642 chuck 1.102
643 mike 1.112 if (_rep->size == size_t(-1))
644 david.dillard 1.105 {
645 mike 1.112 StringRep::free(_rep); 646 _rep = &StringRep::_emptyRep;
647 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,str,n);
648 david.dillard 1.105 }
649 mike 1.112 650 _rep->data[_rep->size] = 0;
651 david.dillard 1.105
652 mike 1.27 return *this; 653 } 654
655 kumpf 1.39 void String::clear() 656 {
657 mike 1.112 if (_rep->size) 658 {
659 mike 1.114 if (_rep->refs.get() == 1)
660 mike 1.112 { 661 _rep->size = 0; 662 _rep->data[0] = '\0'; 663 } 664 else 665 { 666 StringRep::unref(_rep); 667 _rep = &StringRep::_emptyRep; 668 } 669 }
670 kumpf 1.39 } 671
672 mike 1.112 void String::reserveCapacity(Uint32 cap)
673 kumpf 1.39 {
674 mike 1.112 _reserve(_rep, cap);
675 kumpf 1.39 } 676
677 mike 1.112 CString String::getCString() const 678 {
679 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16 680 // counterpart, so we allocate extra memory for the worst case. In the
681 mike 1.112 // best case, we may need only one third of the memory allocated. But
682 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since 683 // CString objects are usually short-lived (disappearing after only a few
684 mike 1.112 // instructions). CString objects are typically created on the stack as 685 // means to obtain a char* pointer. 686 687 #ifdef PEGASUS_STRING_NO_UTF8 688 char* str = (char*)operator new(_rep->size + 1); 689 _copy(str, _rep->data, _rep->size); 690 str[_rep->size] = '\0'; 691 return CString(str);
692 gs.keenan 1.110 #else
693 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
694 mike 1.112 char* str = (char*)operator new(n + 1); 695 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 696 str[size] = '\0'; 697 return CString(str);
698 gs.keenan 1.110 #endif
699 kumpf 1.39 } 700
701 mike 1.112 String& String::append(const Char16* str, Uint32 n)
702 kumpf 1.39 {
703 mike 1.112 _checkNullPointer(str); 704 705 size_t oldSize = _rep->size; 706 size_t newSize = oldSize + n;
707 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
708 mike 1.112 _copy(_rep->data + oldSize, (Uint16)str, n); 709 _rep->size = newSize; 710 _rep->data[newSize] = '\0'; 711 712 return this;
713 kumpf 1.39 } 714
715 mike 1.112 String& String::append(const String& str)
716 mike 1.27 {
717 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
718 mike 1.27 } 719
720 mike 1.112 String& String::append(const char* str, Uint32 size)
721 mike 1.27 {
722 mike 1.112 _checkNullPointer(str); 723 724 size_t oldSize = _rep->size; 725 size_t cap = oldSize + size; 726
727 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
728 mike 1.112 size_t utf8_error_index; 729 size_t tmp = _convert( 730 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 731 732 if (tmp == size_t(-1)) 733 { 734 StringRep::free(_rep); 735 _rep = &StringRep::_emptyRep;
736 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,str,size);
737 mike 1.112 }
738 mike 1.27
739 mike 1.112 _rep->size += tmp; 740 _rep->data[_rep->size] = '\0';
741 mike 1.27
742 kumpf 1.39 return *this; 743 } 744
745 mike 1.112 void String::remove(Uint32 index, Uint32 n)
746 mike 1.27 {
747 mike 1.112 if (n == PEG_NOT_FOUND)
748 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
749 mike 1.112 750 _checkBounds(index + n, _rep->size); 751
752 mike 1.114 if (_rep->refs.get() != 1)
753 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
754 mike 1.27
755 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
756 mike 1.27
757 mike 1.112 size_t rem = _rep->size - (index + n); 758 Uint16* data = _rep->data;
759 mike 1.27
760 mike 1.112 if (rem) 761 memmove(data + index, data + index + n, rem * sizeof(Uint16));
762 mike 1.27
763 mike 1.112 _rep->size -= n; 764 data[_rep->size] = '\0';
765 mike 1.27 } 766
767 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
768 mike 1.27 {
769 mike 1.112 // Note: this implementation is very permissive but used for 770 // backwards compatibility. 771 772 if (index < _rep->size)
773 mike 1.27 {
774 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index)
775 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
776 mike 1.27
777 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
778 mike 1.27 }
779 david.dillard 1.105 780 return String();
781 mike 1.27 } 782 783 Uint32 String::find(Char16 c) const 784 {
785 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
786 mike 1.27
787 mike 1.112 if (p)
788 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
789 mike 1.27 790 return PEG_NOT_FOUND; 791 } 792
793 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
794 mike 1.30 {
795 mike 1.112 _checkBounds(index, _rep->size); 796 797 if (index >= _rep->size) 798 return PEG_NOT_FOUND; 799 800 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
801 mike 1.30
802 mike 1.112 if (p)
803 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
804 mike 1.30 805 return PEG_NOT_FOUND; 806 } 807
808 mike 1.112 Uint32 StringFindAux( 809 const StringRep* _rep, const Char16* s, Uint32 n)
810 mike 1.27 {
811 mike 1.112 _checkNullPointer(s);
812 mike 1.27
813 mike 1.112 const Uint16* data = _rep->data; 814 size_t rem = _rep->size; 815 816 while (n <= rem)
817 mike 1.30 {
818 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 819 820 if (!p) 821 break;
822 mike 1.30
823 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
824 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
825 david.dillard 1.105
826 mike 1.112 p++; 827 rem -= p - data; 828 data = p;
829 mike 1.27 }
830 mike 1.112
831 mike 1.27 return PEG_NOT_FOUND; 832 } 833
834 mike 1.112 Uint32 String::find(const char* s) const 835 { 836 _checkNullPointer(s); 837 838 // Note: could optimize away creation of temporary, but this is rarely 839 // called. 840 return find(String(s)); 841 } 842
843 mike 1.27 Uint32 String::reverseFind(Char16 c) const 844 {
845 mike 1.112 Uint16 x = c; 846 Uint16* p = _rep->data; 847 Uint16* q = _rep->data + _rep->size;
848 mike 1.27
849 mike 1.112 while (q != p)
850 mike 1.27 {
851 mike 1.112 if (*--q == x)
852 david.dillard 1.116 return static_cast<Uint32>(q - p);
853 mike 1.27 } 854 855 return PEG_NOT_FOUND; 856 } 857 858 void String::toLower() 859 {
860 david 1.69 #ifdef PEGASUS_HAS_ICU
861 mike 1.112
862 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
863 david 1.90 {
864 mike 1.114 if (_rep->refs.get() != 1)
865 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 866
867 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
868 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 869 // prevents optimizations where the us-ascii is converted before
870 mike 1.112 // calling ICU.
871 yi.zhou 1.108 // The string may shrink or expand after the convert. 872
873 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 874 //// only the size when zero is passed as the destination size argument. 875
876 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 877
878 mike 1.112 int32_t newSize = u_strToLower( 879 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
880 david.dillard 1.116
881 mike 1.112 err = U_ZERO_ERROR; 882 883 //// Reserve enough space for the result. 884 885 if ((Uint32)newSize > _rep->cap) 886 _reserve(_rep, newSize); 887 888 //// Perform the conversion (overlapping buffers are allowed).
889 chuck 1.99
890 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 891 (UChar)_rep->data, _rep->size, NULL, &err);
892 yi.zhou 1.108
893 mike 1.112 _rep->size = newSize; 894 return;
895 david 1.90 }
896 mike 1.112 897 #endif /* PEGASUS_HAS_ICU */ 898
899 mike 1.114 if (_rep->refs.get() != 1)
900 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 901 902 Uint16* p = _rep->data; 903 size_t n = _rep->size; 904 905 for (; n--; p++)
906 david 1.90 {
907 mike 1.112 if (!(p & 0xFF00)) 908 p = _toLower(*p);
909 mike 1.27 }
910 kumpf 1.39 } 911
912 chuck 1.99 void String::toUpper()
913 david 1.90 { 914 #ifdef PEGASUS_HAS_ICU
915 mike 1.112
916 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
917 chuck 1.99 {
918 mike 1.114 if (_rep->refs.get() != 1)
919 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 920
921 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
922 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 923 // prevents optimizations where the us-ascii is converted before
924 mike 1.112 // calling ICU.
925 yi.zhou 1.108 // The string may shrink or expand after the convert. 926
927 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 928 //// only the size when zero is passed as the destination size argument. 929
930 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 931
932 mike 1.112 int32_t newSize = u_strToUpper( 933 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 934 935 err = U_ZERO_ERROR; 936 937 //// Reserve enough space for the result. 938 939 if ((Uint32)newSize > _rep->cap) 940 _reserve(_rep, newSize); 941 942 //// Perform the conversion (overlapping buffers are allowed). 943 944 u_strToUpper((UChar)_rep->data, newSize, 945 (UChar*)_rep->data, _rep->size, NULL, &err);
946 chuck 1.99
947 mike 1.112 _rep->size = newSize;
948 yi.zhou 1.108
949 mike 1.112 return;
950 david 1.91 }
951 mike 1.112 952 #endif /* PEGASUS_HAS_ICU */ 953
954 mike 1.114 if (_rep->refs.get() != 1)
955 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 956 957 Uint16* p = _rep->data; 958 size_t n = _rep->size; 959 960 for (; n--; p++) 961 p = _toUpper(p);
962 david 1.90 } 963
964 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
965 kumpf 1.39 {
966 kumpf 1.118 const Uint16* p1 = s1._rep->data; 967 const Uint16* p2 = s2._rep->data;
968 mike 1.27
969 kumpf 1.118 while (n--) 970 { 971 int r = p1++ - p2++; 972 if (r) 973 { 974 return r; 975 } 976 else if (!p1[-1]) 977 { 978 // We must have encountered a null terminator in both s1 and s2 979 return 0; 980 } 981 } 982 return 0;
983 mike 1.27 } 984
985 kumpf 1.43 int String::compare(const String& s1, const String& s2)
986 mike 1.30 {
987 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 988 }
989 kumpf 1.43
990 mike 1.112 int String::compare(const String& s1, const char* s2) 991 { 992 _checkNullPointer(s2);
993 mike 1.30
994 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 995 return _compareNoUTF8(s1._rep->data, s2); 996 #else 997 // ATTN: optimize this! 998 return String::compare(s1, String(s2)); 999 #endif
1000 mike 1.30 } 1001
1002 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
1003 kumpf 1.40 {
1004 david 1.69 #ifdef PEGASUS_HAS_ICU
1005 mike 1.112
1006 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1007 {
1008 mike 1.112 return u_strcasecmp(
1009 dave.sudlik 1.124 (const UChar)str1._rep->data, 1010 (const UChar)str2._rep->data, 1011 U_FOLD_CASE_DEFAULT 1012 );
1013 yi.zhou 1.108 }
1014 kumpf 1.40
1015 mike 1.112 #endif /* PEGASUS_HAS_ICU / 1016 1017 const Uint16 s1 = str1._rep->data; 1018 const Uint16* s2 = str2._rep->data; 1019 1020 while (s1 && s2)
1021 kumpf 1.40 {
1022 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
1023 kumpf 1.40
1024 david.dillard 1.105 if (r) 1025 return r;
1026 kumpf 1.40 } 1027
1028 mike 1.112 if (*s2)
1029 david.dillard 1.105 return -1;
1030 mike 1.112 else if (*s1)
1031 david.dillard 1.105 return 1;
1032 kumpf 1.40 1033 return 0; 1034 } 1035
1036 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1037 mike 1.27 {
1038 mike 1.112 #ifdef PEGASUS_HAS_ICU 1039 1040 return String::compareNoCase(s1, s2) == 0; 1041 1042 #else /* PEGASUS_HAS_ICU */
1043 mike 1.27
1044 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 1045 // eliminate.
1046 kumpf 1.39
1047 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1048 Uint16 q = (Uint16*)s2.getChar16Data(); 1049 Uint32 n = s2.size(); 1050 1051 while (n >= 8) 1052 { 1053 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1054 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1055 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1056 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1057 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1058 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1059 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1060 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1061 { 1062 return false; 1063 }
1064 kumpf 1.39
1065 mike 1.112 n -= 8; 1066 p += 8; 1067 q += 8; 1068 }
1069 mike 1.27
1070 mike 1.112 while (n >= 4)
1071 kumpf 1.39 {
1072 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1073 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1074 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1075 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1076 david.dillard 1.105 {
1077 mike 1.112 return false;
1078 david.dillard 1.105 }
1079 mike 1.112 1080 n -= 4; 1081 p += 4; 1082 q += 4; 1083 } 1084 1085 while (n--) 1086 { 1087 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1088 david.dillard 1.105 return false;
1089 mike 1.112 1090 p++; 1091 q++;
1092 kumpf 1.39 }
1093 mike 1.28
1094 kumpf 1.39 return true;
1095 mike 1.112 1096 #endif /* PEGASUS_HAS_ICU */
1097 david 1.69 } 1098
1099 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1100 david 1.69 {
1101 mike 1.112 _checkNullPointer(s2);
1102 david 1.69
1103 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1104 david 1.69
1105 mike 1.112 return String::equalNoCase(s1, String(s2));
1106 david 1.69
1107 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1108 david 1.69
1109 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1110 const char p2 = s2; 1111 size_t n = s1._rep->size;
1112 david.dillard 1.105
1113 mike 1.112 while (n--) 1114 { 1115 if (!*p2) 1116 return false;
1117 david 1.71
1118 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1119 return false; 1120 }
1121 kumpf 1.42
1122 mike 1.112 if (*p2) 1123 return false;
1124 david.dillard 1.116
1125 mike 1.112 return true;
1126 karl 1.36
1127 mike 1.112 #else /* PEGASUS_HAS_ICU */
1128 david.dillard 1.105
1129 mike 1.112 // ATTN: optimize this! 1130 return String::equalNoCase(s1, String(s2));
1131 david.dillard 1.105
1132 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1133 }
1134 chuck 1.78
1135 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1136 karl 1.36 {
1137 marek 1.131 return (s1._rep == s2._rep) \|\|
1138 marek 1.137 ((s1._rep->size == s2._rep->size) && 1139 memcmp(s1._rep->data, 1140 s2._rep->data, 1141 s1._rep->size * sizeof(Uint16)) == 0);
1142 karl 1.36 } 1143
1144 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1145 { 1146 #ifdef PEGASUS_STRING_NO_UTF8
1147 kumpf 1.35
1148 mike 1.112 _checkNullPointer(s2);
1149 kumpf 1.39
1150 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1151 const char q = s2;
1152 kumpf 1.39
1153 mike 1.112 while (p && q) 1154 { 1155 if (p++ != Uint16(q++)) 1156 return false; 1157 }
1158 kumpf 1.39
1159 mike 1.112 return !(p \|\| q);
1160 kumpf 1.39
1161 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1162 kumpf 1.39
1163 mike 1.112 return String::equal(s1, String(s2));
1164 kumpf 1.39
1165 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1166 kumpf 1.39 } 1167
1168 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1169 kumpf 1.39 {
1170 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1171 david 1.69
1172 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1173 {
1174 david.dillard 1.105 char buf = NULL; 1175 const int size = str.size() 6;
1176 mike 1.112 UnicodeString UniStr( 1177 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1178 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1179 buf = new char[bufsize+1]; 1180 UniStr.extract(0,bufsize,buf); 1181 os << buf; 1182 os.flush(); 1183 delete [] buf;
1184 david.dillard 1.116 return os;
1185 yi.zhou 1.108 }
1186 mike 1.112
1187 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
1188 mike 1.112 1189 for (Uint32 i = 0, n = str.size(); i < n; i++)
1190 yi.zhou 1.108 {
1191 mike 1.112 Uint16 code = str[i];
1192 david.dillard 1.105
1193 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1194 os << char(code); 1195 else 1196 { 1197 // Print in hex format: 1198 char buffer[8]; 1199 sprintf(buffer, "\\x%04X", code); 1200 os << buffer;
1201 david.dillard 1.105 }
1202 yi.zhou 1.108 }
1203 kumpf 1.39 1204 return os; 1205 } 1206
1207 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1208 kumpf 1.39 {
1209 mike 1.112 StringRep* tmp; 1210 1211 if (_rep->cap) 1212 { 1213 tmp = StringRep::alloc(2 * _rep->cap); 1214 tmp->size = _rep->size; 1215 _copy(tmp->data, _rep->data, _rep->size); 1216 } 1217 else 1218 { 1219 tmp = StringRep::alloc(8); 1220 tmp->size = 0; 1221 } 1222 1223 StringRep::unref(_rep); 1224 _rep = tmp;
1225 kumpf 1.39 } 1226
1227 thilo.boehm 1.128 void AssignASCII(String& s, const char* str, Uint32 n) 1228 { 1229 class StringLayout 1230 { 1231 public: 1232 StringRep* rep; 1233 }; 1234
1235 kumpf 1.130 StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1236 thilo.boehm 1.128 1237 _checkNullPointer(str); 1238 1239 if (n > that->rep->cap \|\| that->rep->refs.get() != 1) 1240 { 1241 StringRep::unref(that->rep); 1242 that->rep = StringRep::alloc(n); 1243 } 1244 1245 _copy(that->rep->data, str, n); 1246 that->rep->size = n; 1247 that->rep->data[that->rep->size] = 0; 1248 } 1249
1250 mike 1.112 PEGASUS_NAMESPACE_END 1251 1252 /* 1253 ================================================================================ 1254 1255 String optimizations: 1256 1257 1. Added mechanism allowing certain functions to be inlined only when 1258 used by internal Pegasus modules. External modules (i.e., providers) 1259 link to a non-inline version, which allows for binary compatibility. 1260 1261 2. Implemented copy-on-write with atomic increment/decrement. This 1262 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1263 for the 'ni1000' benchmark. 1264 1265 3. Employed loop unrolling in several places. For example, see: 1266 1267 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1268 1269 4. Used the "empty-rep" optimization (described in whitepaper from the 1270 GCC Developers Summit). This reduced default construction to a simple 1271 mike 1.112 pointer assignment. 1272 1273 inline String::String() : _rep(&_emptyRep) { } 1274 1275 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1276 For example: 1277 1278 static const char _upper[] = 1279 { 1280 0,1,2,...255 1281 }; 1282 1283 inline Uint16 _toUpper(Uint16 x) 1284 { 1285 return (x & 0xFF00) ? x : _upper[x]; 1286 } 1287
1288 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
1289 mike 1.112 operation. 1290
1291 david.dillard 1.116 6. Implemented char* version of the following member functions to 1292 eliminate unecessary creation of anonymous string objects
1293 mike 1.112 (temporaries). 1294 1295 String(const String& s1, const char* s2); 1296 String(const char* s1, const String& s2); 1297 String& String::operator=(const char* str); 1298 Uint32 String::find(const char* s) const; 1299 bool String::equal(const String& s1, const char* s2); 1300 static int String::compare(const String& s1, const char* s2); 1301 String& String::append(const char* str); 1302 String& String::append(const char* str, Uint32 size); 1303 static bool String::equalNoCase(const String& s1, const char* s2); 1304 String& operator=(const char* str) 1305 String& String::assign(const char* str) 1306 String& String::append(const char* str) 1307 Boolean operator==(const String& s1, const char* s2) 1308 Boolean operator==(const char* s1, const String& s2) 1309 Boolean operator!=(const String& s1, const char* s2) 1310 Boolean operator!=(const char* s1, const String& s2) 1311 Boolean operator<(const String& s1, const char* s2) 1312 Boolean operator<(const char* s1, const String& s2) 1313 Boolean operator>(const String& s1, const char* s2) 1314 mike 1.112 Boolean operator>(const char* s1, const String& s2) 1315 Boolean operator<=(const String& s1, const char* s2) 1316 Boolean operator<=(const char* s1, const String& s2) 1317 Boolean operator>=(const String& s1, const char* s2) 1318 Boolean operator>=(const char* s1, const String& s2) 1319 String operator+(const String& s1, const char* s2) 1320 String operator+(const char* s1, const String& s2) 1321
1322 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
1323 mike 1.112 power of two (algorithm from the book "Hacker's Delight"). 1324 1325 static Uint32 _roundUpToPow2(Uint32 x) 1326 { 1327 if (x < 8) 1328 return 8; 1329 1330 x--; 1331 x \|= (x >> 1); 1332 x \|= (x >> 2); 1333 x \|= (x >> 4); 1334 x \|= (x >> 8); 1335 x \|= (x >> 16); 1336 x++; 1337 1338 return x; 1339 } 1340 1341 8. Implemented "concatenating constructors" to eliminate temporaries
1342 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
1343 mike 1.112 optimization" described by Stan Lippman. 1344 1345 inline String operator+(const String& s1, const String& s2) 1346 { 1347 return String(s1, s2, 0); 1348 } 1349 1350 9. Experimented to find the optimial initial size for a short string. 1351 Eight seems to offer the best tradeoff between space and time. 1352 1353 10. Inlined all members of the Char16 class. 1354 1355 11. Used Uint16 internally in the String class. This showed no improvememnt 1356 since Char16 was already fully inlined and was essentially reduced to 1357 Uint16 in any case. 1358 1359 12. Implemented conditional logic (#if) allowing error checking logic to
1360 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
1361 mike 1.112 and null-pointer checking. 1362 1363 13. Used memcpy() and memcmp() where possible. These are implemented using 1364 the rep family of intructions under Intel and are much faster. 1365
1366 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1367 mike 1.112 copy routine overhead. 1368 1369 15. Added ASCII7 form of the constructor and assign(). 1370 1371 String s("hello world", String::ASCII7); 1372 1373 s.assignASCII7("hello world"); 1374 1375 This avoids slower UTF8 processing when not needed. 1376 1377 ================================================================================ 1378 */

No CVS admin address has been configured