pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
2 martin 1.134 //
3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license 4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with 5 // this work for additional information regarding copyright ownership. 6 // Each contributor licenses this file to you under the OpenPegasus Open 7 // Source License; you may not use this file except in compliance with the 8 // License.
9 martin 1.134 //
10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a 11 // copy of this software and associated documentation files (the "Software"), 12 // to deal in the Software without restriction, including without limitation 13 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 // and/or sell copies of the Software, and to permit persons to whom the 15 // Software is furnished to do so, subject to the following conditions:
16 martin 1.134 //
17 martin 1.133 // The above copyright notice and this permission notice shall be included 18 // in all copies or substantial portions of the Software.
19 martin 1.134 //
20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 martin 1.134 //
28 martin 1.133 //////////////////////////////////////////////////////////////////////////
29 mike 1.27 // 30 //%///////////////////////////////////////////////////////////////////////////// 31
32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
33 mike 1.113 #include <cstring>
34 kumpf 1.48 #include "InternalException.h"
35 mike 1.112 #include "MessageLoader.h" 36 #include "StringRep.h"
37 david 1.69 38 #ifdef PEGASUS_HAS_ICU
39 kumpf 1.132 # include <unicode/ures.h> 40 # include <unicode/ustring.h> 41 # include <unicode/uchar.h>
42 david 1.69 #endif 43
44 mike 1.112 PEGASUS_NAMESPACE_BEGIN
45 mike 1.28
46 mike 1.112 //============================================================================== 47 // 48 // Compile-time macros (undefined by default). 49 // 50 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 51 // 52 //==============================================================================
53 mike 1.27
54 mike 1.112 //==============================================================================
55 kumpf 1.39 //
56 mike 1.112 // File-scope definitions:
57 kumpf 1.54 //
58 mike 1.112 //============================================================================== 59 60 // Note: this table is much faster than the system toupper(). Please do not 61 // change.
62 kumpf 1.54
63 david.dillard 1.116 const Uint8 _toUpperTable[256] =
64 kumpf 1.54 {
65 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 66 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 67 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 68 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 69 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 70 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 71 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 72 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 73 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 74 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 75 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 76 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 77 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 78 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 79 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 80 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 81 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 82 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 83 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 84 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 85 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 86 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 87 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 88 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 89 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 90 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 91 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 92 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 93 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 94 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 95 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 96 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 97 }; 98 99 // Note: this table is much faster than the system tulower(). Please do not 100 // change. 101
102 david.dillard 1.116 const Uint8 _toLowerTable[256] =
103 mike 1.112 { 104 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 105 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 106 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 107 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 108 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 109 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 110 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 111 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 112 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 113 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 114 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 115 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 116 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 117 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 118 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 119 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 120 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 121 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 122 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 123 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 124 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 125 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 126 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 127 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 128 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 129 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 130 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 131 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 132 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 133 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 134 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 135 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 136 }; 137 138 // Converts 16-bit characters to upper case. This routine is faster than the 139 // system toupper(). Please do not change. 140 inline Uint16 _toUpper(Uint16 x) 141 { 142 return (x & 0xFF00) ? x : _toUpperTable[x];
143 kumpf 1.54 } 144
145 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 146 // system toupper(). Please do not change. 147 inline Uint16 _toLower(Uint16 x)
148 kumpf 1.54 {
149 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 150 } 151 152 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 153 static Uint32 _roundUpToPow2(Uint32 x) 154 {
155 dave.sudlik 1.120 // Check for potential overflow in x 156 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
157 mike 1.112 158 if (x < 8) 159 return 8; 160 161 x--; 162 x \|= (x >> 1); 163 x \|= (x >> 2); 164 x \|= (x >> 4); 165 x \|= (x >> 8); 166 x \|= (x >> 16); 167 x++; 168 169 return x; 170 } 171 172 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 173 { 174 // The following employs loop unrolling for efficiency. Please do not 175 // eliminate. 176 177 while (n >= 4) 178 mike 1.112 { 179 if (s[0] == c) 180 return (Uint16)s; 181 if (s[1] == c) 182 return (Uint16)&s[1]; 183 if (s[2] == c) 184 return (Uint16)&s[2]; 185 if (s[3] == c) 186 return (Uint16)&s[3];
187 kumpf 1.82
188 mike 1.112 n -= 4; 189 s += 4; 190 } 191 192 if (n) 193 { 194 if (s == c) 195 return (Uint16)s; 196 s++; 197 n--; 198 } 199 200 if (n) 201 { 202 if (s == c) 203 return (Uint16)s; 204 s++; 205 n--; 206 } 207 208 if (n && s == c) 209 mike 1.112 return (Uint16)s; 210 211 // Not found! 212 return 0; 213 } 214 215 static int _compare(const Uint16* s1, const Uint16* s2) 216 { 217 while (s1 && s2) 218 { 219 int r = s1++ - s2++; 220 221 if (r) 222 return r; 223 } 224 225 if (s2) 226 return -1; 227 else if (s1) 228 return 1; 229 230 mike 1.112 return 0; 231 } 232
233 kumpf 1.130 #ifdef PEGASUS_STRING_NO_UTF8
234 mike 1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2) 235 { 236 Uint16 c1; 237 Uint16 c2; 238 239 do 240 { 241 c1 = s1++; 242 c2 = s2++; 243 244 if (c1 == 0) 245 return c1 - c2; 246 } 247 while (c1 == c2); 248 249 return c1 - c2; 250 }
251 kumpf 1.130 #endif
252 mike 1.112 253 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 254 { 255 memcpy(s1, s2, n * sizeof(Uint16)); 256 } 257 258 void StringThrowOutOfBounds() 259 { 260 throw IndexOutOfBoundsException(); 261 } 262 263 inline void _checkNullPointer(const void* ptr) 264 { 265 if (!ptr) 266 throw NullPointer(); 267 } 268 269 static void _StringThrowBadUTF8(Uint32 index) 270 { 271 MessageLoaderParms parms( 272 "Common.String.BAD_UTF8", 273 mike 1.112 "The byte sequence starting at index $0 " 274 "is not valid UTF-8 encoding.", 275 index); 276 throw Exception(parms); 277 } 278
279 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
280 mike 1.112 // terminator). 281 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 282 { 283 // The following employs loop unrolling for efficiency. Please do not 284 // eliminate. 285 286 const Uint16* q = src; 287 Uint8* p = (Uint8*)dest; 288 289 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
290 kumpf 1.82 {
291 mike 1.112 p[0] = q[0]; 292 p[1] = q[1]; 293 p[2] = q[2]; 294 p[3] = q[3]; 295 p += 4; 296 q += 4; 297 n -= 4;
298 kumpf 1.82 }
299 mike 1.112 300 switch (n) 301 { 302 case 0: 303 return p - (Uint8)dest; 304 case 1: 305 if (q[0] < 128) 306 { 307 p[0] = q[0]; 308 return p + 1 - (Uint8)dest; 309 } 310 break; 311 case 2: 312 if (q[0] < 128 && q[1] < 128) 313 { 314 p[0] = q[0]; 315 p[1] = q[1]; 316 return p + 2 - (Uint8)dest; 317 } 318 break; 319 case 3: 320 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 321 { 322 p[0] = q[0]; 323 p[1] = q[1]; 324 p[2] = q[2]; 325 return p + 3 - (Uint8)dest; 326 } 327 break; 328 } 329 330 // If this line was reached, there must be characters greater than 128. 331 332 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 333 334 return p - (Uint8*)dest;
335 kumpf 1.54 } 336
337 mike 1.112 //============================================================================== 338 // 339 // class CString 340 // 341 //============================================================================== 342 343 CString::CString(const CString& cstr) : _rep(0)
344 kumpf 1.54 {
345 mike 1.112 if (cstr._rep)
346 kumpf 1.82 {
347 mike 1.112 size_t n = strlen(cstr._rep) + 1; 348 _rep = (char*)operator new(n); 349 memcpy(_rep, cstr._rep, n);
350 kumpf 1.82 }
351 kumpf 1.54 } 352
353 kumpf 1.56 CString& CString::operator=(const CString& cstr) 354 {
355 kumpf 1.82 if (&cstr != this)
356 kumpf 1.81 {
357 kumpf 1.82 if (_rep) 358 {
359 mike 1.112 operator delete(_rep);
360 kumpf 1.82 _rep = 0; 361 }
362 mike 1.112
363 kumpf 1.82 if (cstr._rep) 364 {
365 mike 1.112 size_t n = strlen(cstr._rep) + 1; 366 _rep = (char*)operator new(n); 367 memcpy(_rep, cstr._rep, n);
368 kumpf 1.82 }
369 kumpf 1.81 }
370 mike 1.112
371 kumpf 1.56 return *this; 372 } 373
374 mike 1.112 //==============================================================================
375 kumpf 1.54 //
376 mike 1.112 // class StringRep
377 kumpf 1.39 //
378 mike 1.112 //==============================================================================
379 kumpf 1.39
380 mike 1.112 StringRep StringRep::_emptyRep;
381 mike 1.27
382 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
383 mike 1.27 {
384 dave.sudlik 1.120 // Check for potential overflow in cap 385 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
386 mike 1.27
387 mike 1.112 StringRep* rep = (StringRep)::operator new( 388 sizeof(StringRep) + cap sizeof(Uint16)); 389 rep->cap = cap; 390 new(&rep->refs) AtomicInt(1); 391 392 return rep;
393 mike 1.27 } 394
395 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
396 chuck 1.102 {
397 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
398 chuck 1.102 {
399 mike 1.112 size_t n = _roundUpToPow2(cap); 400 StringRep* newRep = StringRep::alloc(n); 401 newRep->size = rep->size; 402 _copy(newRep->data, rep->data, rep->size + 1); 403 StringRep::unref(rep); 404 rep = newRep; 405 } 406 }
407 david.dillard 1.105
408 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 409 { 410 StringRep* rep = StringRep::alloc(size); 411 rep->size = size; 412 _copy(rep->data, data, size); 413 rep->data[size] = '\0'; 414 return rep; 415 }
416 chuck 1.102
417 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 418 { 419 // Return a new copy of rep. Release rep.
420 chuck 1.102
421 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 422 newRep->size = rep->size; 423 _copy(newRep->data, rep->data, rep->size); 424 newRep->data[newRep->size] = '\0'; 425 StringRep::unref(rep); 426 return newRep;
427 chuck 1.102 } 428
429 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
430 kumpf 1.43 {
431 mike 1.112 StringRep* rep = StringRep::alloc(size); 432 size_t utf8_error_index; 433 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 434 435 if (rep->size == size_t(-1)) 436 { 437 StringRep::free(rep);
438 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
439 mike 1.112 }
440 kumpf 1.43
441 mike 1.112 rep->data[rep->size] = '\0';
442 kumpf 1.43
443 mike 1.112 return rep;
444 mike 1.27 } 445
446 mike 1.112 Uint32 StringRep::length(const Uint16* str)
447 mike 1.27 {
448 mike 1.112 // Note: We could unroll this but it is rarely called. 449 450 const Uint16* end = (Uint16)str; 451 452 while (end++) 453 ; 454
455 a.dunfey 1.125 return (Uint32)(end - str - 1);
456 kumpf 1.39 }
457 tony 1.66
458 mike 1.112 //============================================================================== 459 // 460 // class String 461 // 462 //============================================================================== 463 464 const String String::EMPTY;
465 mike 1.27
466 kumpf 1.39 String::String(const String& str, Uint32 n) 467 {
468 mike 1.112 _checkBounds(n, str._rep->size); 469 _rep = StringRep::create(str._rep->data, n);
470 kumpf 1.39 } 471 472 String::String(const Char16* str) 473 {
474 mike 1.112 _checkNullPointer(str); 475 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
476 mike 1.27 } 477
478 kumpf 1.39 String::String(const Char16* str, Uint32 n) 479 {
480 mike 1.112 _checkNullPointer(str); 481 _rep = StringRep::create((Uint16*)str, n);
482 kumpf 1.39 } 483 484 String::String(const char* str)
485 mike 1.27 {
486 mike 1.112 _checkNullPointer(str);
487 david.dillard 1.105
488 mike 1.112 // Set this just in case create() throws an exception. 489 _rep = &StringRep::_emptyRep; 490 _rep = StringRep::create(str, strlen(str));
491 mike 1.27 } 492
493 kumpf 1.39 String::String(const char* str, Uint32 n)
494 mike 1.27 {
495 mike 1.112 _checkNullPointer(str);
496 david.dillard 1.105
497 mike 1.112 // Set this just in case create() throws an exception. 498 _rep = &StringRep::_emptyRep; 499 _rep = StringRep::create(str, n);
500 kumpf 1.39 }
501 mike 1.27
502 mike 1.112 String::String(const String& s1, const String& s2)
503 kumpf 1.39 {
504 mike 1.112 size_t n1 = s1._rep->size; 505 size_t n2 = s2._rep->size; 506 size_t n = n1 + n2; 507 _rep = StringRep::alloc(n); 508 _copy(_rep->data, s1._rep->data, n1); 509 _copy(_rep->data + n1, s2._rep->data, n2); 510 _rep->size = n; 511 _rep->data[n] = '\0';
512 mike 1.27 } 513
514 mike 1.112 String::String(const String& s1, const char* s2)
515 mike 1.27 {
516 mike 1.112 _checkNullPointer(s2); 517 size_t n1 = s1._rep->size; 518 size_t n2 = strlen(s2); 519 _rep = StringRep::alloc(n1 + n2); 520 _copy(_rep->data, s1._rep->data, n1); 521 size_t utf8_error_index; 522 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 523 524 if (tmp == size_t(-1))
525 kumpf 1.82 {
526 mike 1.112 StringRep::free(_rep); 527 _rep = &StringRep::_emptyRep;
528 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
529 kumpf 1.82 }
530 mike 1.112 531 _rep->size = n1 + tmp; 532 _rep->data[_rep->size] = '\0';
533 mike 1.27 } 534
535 mike 1.112 String::String(const char* s1, const String& s2)
536 mike 1.27 {
537 mike 1.112 _checkNullPointer(s1); 538 size_t n1 = strlen(s1); 539 size_t n2 = s2._rep->size; 540 _rep = StringRep::alloc(n1 + n2); 541 size_t utf8_error_index; 542 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 543 544 if (tmp == size_t(-1)) 545 { 546 StringRep::free(_rep); 547 _rep = &StringRep::_emptyRep;
548 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
549 mike 1.112 } 550 551 _rep->size = n2 + tmp; 552 _copy(_rep->data + n1, s2._rep->data, n2); 553 _rep->data[_rep->size] = '\0';
554 mike 1.27 } 555
556 mike 1.112 String& String::assign(const String& str)
557 mike 1.27 {
558 mike 1.112 if (_rep != str._rep)
559 david.dillard 1.105 {
560 mike 1.112 StringRep::unref(_rep); 561 StringRep::ref(_rep = str._rep);
562 david.dillard 1.105 } 563
564 mike 1.27 return this; 565 } 566 567 String& String::assign(const Char16 str, Uint32 n) 568 {
569 mike 1.112 _checkNullPointer(str); 570
571 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
572 david.dillard 1.105 {
573 mike 1.112 StringRep::unref(_rep); 574 _rep = StringRep::alloc(n);
575 david.dillard 1.105 } 576
577 mike 1.112 _rep->size = n; 578 _copy(_rep->data, (Uint16*)str, n); 579 _rep->data[n] = '\0'; 580
581 mike 1.27 return *this; 582 } 583
584 mike 1.112 String& String::assign(const char* str, Uint32 n)
585 chuck 1.102 {
586 mike 1.112 _checkNullPointer(str); 587
588 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
589 david.dillard 1.105 {
590 mike 1.112 StringRep::unref(_rep); 591 _rep = StringRep::alloc(n);
592 david.dillard 1.105 } 593
594 mike 1.112 size_t utf8_error_index; 595 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
596 chuck 1.102
597 mike 1.112 if (_rep->size == size_t(-1))
598 david.dillard 1.105 {
599 mike 1.112 StringRep::free(_rep); 600 _rep = &StringRep::_emptyRep;
601 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
602 david.dillard 1.105 }
603 mike 1.112 604 _rep->data[_rep->size] = 0;
605 david.dillard 1.105
606 mike 1.27 return *this; 607 } 608
609 kumpf 1.39 void String::clear() 610 {
611 mike 1.112 if (_rep->size) 612 {
613 mike 1.114 if (_rep->refs.get() == 1)
614 mike 1.112 { 615 _rep->size = 0; 616 _rep->data[0] = '\0'; 617 } 618 else 619 { 620 StringRep::unref(_rep); 621 _rep = &StringRep::_emptyRep; 622 } 623 }
624 kumpf 1.39 } 625
626 mike 1.112 void String::reserveCapacity(Uint32 cap)
627 kumpf 1.39 {
628 mike 1.112 _reserve(_rep, cap);
629 kumpf 1.39 } 630
631 mike 1.112 CString String::getCString() const 632 {
633 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16 634 // counterpart, so we allocate extra memory for the worst case. In the
635 mike 1.112 // best case, we may need only one third of the memory allocated. But
636 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since 637 // CString objects are usually short-lived (disappearing after only a few
638 mike 1.112 // instructions). CString objects are typically created on the stack as 639 // means to obtain a char* pointer. 640 641 #ifdef PEGASUS_STRING_NO_UTF8 642 char* str = (char*)operator new(_rep->size + 1); 643 _copy(str, _rep->data, _rep->size); 644 str[_rep->size] = '\0'; 645 return CString(str);
646 gs.keenan 1.110 #else
647 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
648 mike 1.112 char* str = (char*)operator new(n + 1); 649 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 650 str[size] = '\0'; 651 return CString(str);
652 gs.keenan 1.110 #endif
653 kumpf 1.39 } 654
655 mike 1.112 String& String::append(const Char16* str, Uint32 n)
656 kumpf 1.39 {
657 mike 1.112 _checkNullPointer(str); 658 659 size_t oldSize = _rep->size; 660 size_t newSize = oldSize + n;
661 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
662 mike 1.112 _copy(_rep->data + oldSize, (Uint16)str, n); 663 _rep->size = newSize; 664 _rep->data[newSize] = '\0'; 665 666 return this;
667 kumpf 1.39 } 668
669 mike 1.112 String& String::append(const String& str)
670 mike 1.27 {
671 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
672 mike 1.27 } 673
674 mike 1.112 String& String::append(const char* str, Uint32 size)
675 mike 1.27 {
676 mike 1.112 _checkNullPointer(str); 677 678 size_t oldSize = _rep->size; 679 size_t cap = oldSize + size; 680
681 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
682 mike 1.112 size_t utf8_error_index; 683 size_t tmp = _convert( 684 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 685 686 if (tmp == size_t(-1)) 687 { 688 StringRep::free(_rep); 689 _rep = &StringRep::_emptyRep;
690 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
691 mike 1.112 }
692 mike 1.27
693 mike 1.112 _rep->size += tmp; 694 _rep->data[_rep->size] = '\0';
695 mike 1.27
696 kumpf 1.39 return *this; 697 } 698
699 mike 1.112 void String::remove(Uint32 index, Uint32 n)
700 mike 1.27 {
701 mike 1.112 if (n == PEG_NOT_FOUND)
702 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
703 mike 1.112 704 _checkBounds(index + n, _rep->size); 705
706 mike 1.114 if (_rep->refs.get() != 1)
707 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
708 mike 1.27
709 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
710 mike 1.27
711 mike 1.112 size_t rem = _rep->size - (index + n); 712 Uint16* data = _rep->data;
713 mike 1.27
714 mike 1.112 if (rem) 715 memmove(data + index, data + index + n, rem * sizeof(Uint16));
716 mike 1.27
717 mike 1.112 _rep->size -= n; 718 data[_rep->size] = '\0';
719 mike 1.27 } 720
721 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
722 mike 1.27 {
723 mike 1.112 // Note: this implementation is very permissive but used for 724 // backwards compatibility. 725 726 if (index < _rep->size)
727 mike 1.27 {
728 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index)
729 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
730 mike 1.27
731 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
732 mike 1.27 }
733 david.dillard 1.105 734 return String();
735 mike 1.27 } 736 737 Uint32 String::find(Char16 c) const 738 {
739 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
740 mike 1.27
741 mike 1.112 if (p)
742 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
743 mike 1.27 744 return PEG_NOT_FOUND; 745 } 746
747 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
748 mike 1.30 {
749 mike 1.112 _checkBounds(index, _rep->size); 750 751 if (index >= _rep->size) 752 return PEG_NOT_FOUND; 753 754 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
755 mike 1.30
756 mike 1.112 if (p)
757 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
758 mike 1.30 759 return PEG_NOT_FOUND; 760 } 761
762 mike 1.112 Uint32 StringFindAux( 763 const StringRep* _rep, const Char16* s, Uint32 n)
764 mike 1.27 {
765 mike 1.112 _checkNullPointer(s);
766 mike 1.27
767 mike 1.112 const Uint16* data = _rep->data; 768 size_t rem = _rep->size; 769 770 while (n <= rem)
771 mike 1.30 {
772 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 773 774 if (!p) 775 break;
776 mike 1.30
777 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
778 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
779 david.dillard 1.105
780 mike 1.112 p++; 781 rem -= p - data; 782 data = p;
783 mike 1.27 }
784 mike 1.112
785 mike 1.27 return PEG_NOT_FOUND; 786 } 787
788 mike 1.112 Uint32 String::find(const char* s) const 789 { 790 _checkNullPointer(s); 791 792 // Note: could optimize away creation of temporary, but this is rarely 793 // called. 794 return find(String(s)); 795 } 796
797 mike 1.27 Uint32 String::reverseFind(Char16 c) const 798 {
799 mike 1.112 Uint16 x = c; 800 Uint16* p = _rep->data; 801 Uint16* q = _rep->data + _rep->size;
802 mike 1.27
803 mike 1.112 while (q != p)
804 mike 1.27 {
805 mike 1.112 if (*--q == x)
806 david.dillard 1.116 return static_cast<Uint32>(q - p);
807 mike 1.27 } 808 809 return PEG_NOT_FOUND; 810 } 811 812 void String::toLower() 813 {
814 david 1.69 #ifdef PEGASUS_HAS_ICU
815 mike 1.112
816 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
817 david 1.90 {
818 mike 1.114 if (_rep->refs.get() != 1)
819 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 820
821 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
822 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 823 // prevents optimizations where the us-ascii is converted before
824 mike 1.112 // calling ICU.
825 yi.zhou 1.108 // The string may shrink or expand after the convert. 826
827 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 828 //// only the size when zero is passed as the destination size argument. 829
830 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 831
832 mike 1.112 int32_t newSize = u_strToLower( 833 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
834 david.dillard 1.116
835 mike 1.112 err = U_ZERO_ERROR; 836 837 //// Reserve enough space for the result. 838 839 if ((Uint32)newSize > _rep->cap) 840 _reserve(_rep, newSize); 841 842 //// Perform the conversion (overlapping buffers are allowed).
843 chuck 1.99
844 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 845 (UChar)_rep->data, _rep->size, NULL, &err);
846 yi.zhou 1.108
847 mike 1.112 _rep->size = newSize; 848 return;
849 david 1.90 }
850 mike 1.112 851 #endif /* PEGASUS_HAS_ICU */ 852
853 mike 1.114 if (_rep->refs.get() != 1)
854 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 855 856 Uint16* p = _rep->data; 857 size_t n = _rep->size; 858 859 for (; n--; p++)
860 david 1.90 {
861 mike 1.112 if (!(p & 0xFF00)) 862 p = _toLower(*p);
863 mike 1.27 }
864 kumpf 1.39 } 865
866 chuck 1.99 void String::toUpper()
867 david 1.90 { 868 #ifdef PEGASUS_HAS_ICU
869 mike 1.112
870 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
871 chuck 1.99 {
872 mike 1.114 if (_rep->refs.get() != 1)
873 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 874
875 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
876 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 877 // prevents optimizations where the us-ascii is converted before
878 mike 1.112 // calling ICU.
879 yi.zhou 1.108 // The string may shrink or expand after the convert. 880
881 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 882 //// only the size when zero is passed as the destination size argument. 883
884 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 885
886 mike 1.112 int32_t newSize = u_strToUpper( 887 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 888 889 err = U_ZERO_ERROR; 890 891 //// Reserve enough space for the result. 892 893 if ((Uint32)newSize > _rep->cap) 894 _reserve(_rep, newSize); 895 896 //// Perform the conversion (overlapping buffers are allowed). 897 898 u_strToUpper((UChar)_rep->data, newSize, 899 (UChar*)_rep->data, _rep->size, NULL, &err);
900 chuck 1.99
901 mike 1.112 _rep->size = newSize;
902 yi.zhou 1.108
903 mike 1.112 return;
904 david 1.91 }
905 mike 1.112 906 #endif /* PEGASUS_HAS_ICU */ 907
908 mike 1.114 if (_rep->refs.get() != 1)
909 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 910 911 Uint16* p = _rep->data; 912 size_t n = _rep->size; 913 914 for (; n--; p++) 915 p = _toUpper(p);
916 david 1.90 } 917
918 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
919 kumpf 1.39 {
920 kumpf 1.118 const Uint16* p1 = s1._rep->data; 921 const Uint16* p2 = s2._rep->data;
922 mike 1.27
923 kumpf 1.118 while (n--) 924 { 925 int r = p1++ - p2++; 926 if (r) 927 { 928 return r; 929 } 930 else if (!p1[-1]) 931 { 932 // We must have encountered a null terminator in both s1 and s2 933 return 0; 934 } 935 } 936 return 0;
937 mike 1.27 } 938
939 kumpf 1.43 int String::compare(const String& s1, const String& s2)
940 mike 1.30 {
941 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 942 }
943 kumpf 1.43
944 mike 1.112 int String::compare(const String& s1, const char* s2) 945 { 946 _checkNullPointer(s2);
947 mike 1.30
948 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 949 return _compareNoUTF8(s1._rep->data, s2); 950 #else 951 // ATTN: optimize this! 952 return String::compare(s1, String(s2)); 953 #endif
954 mike 1.30 } 955
956 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
957 kumpf 1.40 {
958 david 1.69 #ifdef PEGASUS_HAS_ICU
959 mike 1.112
960 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 961 {
962 mike 1.112 return u_strcasecmp(
963 dave.sudlik 1.124 (const UChar)str1._rep->data, 964 (const UChar)str2._rep->data, 965 U_FOLD_CASE_DEFAULT 966 );
967 yi.zhou 1.108 }
968 kumpf 1.40
969 mike 1.112 #endif /* PEGASUS_HAS_ICU / 970 971 const Uint16 s1 = str1._rep->data; 972 const Uint16* s2 = str2._rep->data; 973 974 while (s1 && s2)
975 kumpf 1.40 {
976 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
977 kumpf 1.40
978 david.dillard 1.105 if (r) 979 return r;
980 kumpf 1.40 } 981
982 mike 1.112 if (*s2)
983 david.dillard 1.105 return -1;
984 mike 1.112 else if (*s1)
985 david.dillard 1.105 return 1;
986 kumpf 1.40 987 return 0; 988 } 989
990 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
991 mike 1.27 {
992 mike 1.112 #ifdef PEGASUS_HAS_ICU 993 994 return String::compareNoCase(s1, s2) == 0; 995 996 #else /* PEGASUS_HAS_ICU */
997 mike 1.27
998 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 999 // eliminate.
1000 kumpf 1.39
1001 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1002 Uint16 q = (Uint16*)s2.getChar16Data(); 1003 Uint32 n = s2.size(); 1004 1005 while (n >= 8) 1006 { 1007 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1008 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1009 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1010 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1011 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1012 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1013 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1014 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1015 { 1016 return false; 1017 }
1018 kumpf 1.39
1019 mike 1.112 n -= 8; 1020 p += 8; 1021 q += 8; 1022 }
1023 mike 1.27
1024 mike 1.112 while (n >= 4)
1025 kumpf 1.39 {
1026 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1027 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1028 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1029 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1030 david.dillard 1.105 {
1031 mike 1.112 return false;
1032 david.dillard 1.105 }
1033 mike 1.112 1034 n -= 4; 1035 p += 4; 1036 q += 4; 1037 } 1038 1039 while (n--) 1040 { 1041 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1042 david.dillard 1.105 return false;
1043 mike 1.112 1044 p++; 1045 q++;
1046 kumpf 1.39 }
1047 mike 1.28
1048 kumpf 1.39 return true;
1049 mike 1.112 1050 #endif /* PEGASUS_HAS_ICU */
1051 david 1.69 } 1052
1053 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1054 david 1.69 {
1055 mike 1.112 _checkNullPointer(s2);
1056 david 1.69
1057 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1058 david 1.69
1059 mike 1.112 return String::equalNoCase(s1, String(s2));
1060 david 1.69
1061 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1062 david 1.69
1063 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1064 const char p2 = s2; 1065 size_t n = s1._rep->size;
1066 david.dillard 1.105
1067 mike 1.112 while (n--) 1068 { 1069 if (!*p2) 1070 return false;
1071 david 1.71
1072 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1073 return false; 1074 }
1075 kumpf 1.42
1076 mike 1.112 if (*p2) 1077 return false;
1078 david.dillard 1.116
1079 mike 1.112 return true;
1080 karl 1.36
1081 mike 1.112 #else /* PEGASUS_HAS_ICU */
1082 david.dillard 1.105
1083 mike 1.112 // ATTN: optimize this! 1084 return String::equalNoCase(s1, String(s2));
1085 david.dillard 1.105
1086 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1087 }
1088 chuck 1.78
1089 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1090 karl 1.36 {
1091 marek 1.131 return (s1._rep == s2._rep) \|\|
1092 marek 1.137 ((s1._rep->size == s2._rep->size) && 1093 memcmp(s1._rep->data, 1094 s2._rep->data, 1095 s1._rep->size * sizeof(Uint16)) == 0);
1096 karl 1.36 } 1097
1098 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1099 { 1100 #ifdef PEGASUS_STRING_NO_UTF8
1101 kumpf 1.35
1102 mike 1.112 _checkNullPointer(s2);
1103 kumpf 1.39
1104 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1105 const char q = s2;
1106 kumpf 1.39
1107 mike 1.112 while (p && q) 1108 { 1109 if (p++ != Uint16(q++)) 1110 return false; 1111 }
1112 kumpf 1.39
1113 mike 1.112 return !(p \|\| q);
1114 kumpf 1.39
1115 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1116 kumpf 1.39
1117 mike 1.112 return String::equal(s1, String(s2));
1118 kumpf 1.39
1119 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1120 kumpf 1.39 } 1121
1122 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1123 kumpf 1.39 {
1124 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1125 david 1.69
1126 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1127 {
1128 david.dillard 1.105 char buf = NULL; 1129 const int size = str.size() 6;
1130 mike 1.112 UnicodeString UniStr( 1131 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1132 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1133 buf = new char[bufsize+1]; 1134 UniStr.extract(0,bufsize,buf); 1135 os << buf; 1136 os.flush(); 1137 delete [] buf;
1138 david.dillard 1.116 return os;
1139 yi.zhou 1.108 }
1140 mike 1.112
1141 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
1142 mike 1.112 1143 for (Uint32 i = 0, n = str.size(); i < n; i++)
1144 yi.zhou 1.108 {
1145 mike 1.112 Uint16 code = str[i];
1146 david.dillard 1.105
1147 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1148 os << char(code); 1149 else 1150 { 1151 // Print in hex format: 1152 char buffer[8]; 1153 sprintf(buffer, "\\x%04X", code); 1154 os << buffer;
1155 david.dillard 1.105 }
1156 yi.zhou 1.108 }
1157 kumpf 1.39 1158 return os; 1159 } 1160
1161 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1162 kumpf 1.39 {
1163 mike 1.112 StringRep* tmp; 1164 1165 if (_rep->cap) 1166 { 1167 tmp = StringRep::alloc(2 * _rep->cap); 1168 tmp->size = _rep->size; 1169 _copy(tmp->data, _rep->data, _rep->size); 1170 } 1171 else 1172 { 1173 tmp = StringRep::alloc(8); 1174 tmp->size = 0; 1175 } 1176 1177 StringRep::unref(_rep); 1178 _rep = tmp;
1179 kumpf 1.39 } 1180
1181 thilo.boehm 1.128 void AssignASCII(String& s, const char* str, Uint32 n) 1182 { 1183 class StringLayout 1184 { 1185 public: 1186 StringRep* rep; 1187 }; 1188
1189 kumpf 1.130 StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1190 thilo.boehm 1.128 1191 _checkNullPointer(str); 1192 1193 if (n > that->rep->cap \|\| that->rep->refs.get() != 1) 1194 { 1195 StringRep::unref(that->rep); 1196 that->rep = StringRep::alloc(n); 1197 } 1198 1199 _copy(that->rep->data, str, n); 1200 that->rep->size = n; 1201 that->rep->data[that->rep->size] = 0; 1202 } 1203
1204 mike 1.112 PEGASUS_NAMESPACE_END 1205 1206 /* 1207 ================================================================================ 1208 1209 String optimizations: 1210 1211 1. Added mechanism allowing certain functions to be inlined only when 1212 used by internal Pegasus modules. External modules (i.e., providers) 1213 link to a non-inline version, which allows for binary compatibility. 1214 1215 2. Implemented copy-on-write with atomic increment/decrement. This 1216 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1217 for the 'ni1000' benchmark. 1218 1219 3. Employed loop unrolling in several places. For example, see: 1220 1221 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1222 1223 4. Used the "empty-rep" optimization (described in whitepaper from the 1224 GCC Developers Summit). This reduced default construction to a simple 1225 mike 1.112 pointer assignment. 1226 1227 inline String::String() : _rep(&_emptyRep) { } 1228 1229 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1230 For example: 1231 1232 static const char _upper[] = 1233 { 1234 0,1,2,...255 1235 }; 1236 1237 inline Uint16 _toUpper(Uint16 x) 1238 { 1239 return (x & 0xFF00) ? x : _upper[x]; 1240 } 1241
1242 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
1243 mike 1.112 operation. 1244
1245 david.dillard 1.116 6. Implemented char* version of the following member functions to 1246 eliminate unecessary creation of anonymous string objects
1247 mike 1.112 (temporaries). 1248 1249 String(const String& s1, const char* s2); 1250 String(const char* s1, const String& s2); 1251 String& String::operator=(const char* str); 1252 Uint32 String::find(const char* s) const; 1253 bool String::equal(const String& s1, const char* s2); 1254 static int String::compare(const String& s1, const char* s2); 1255 String& String::append(const char* str); 1256 String& String::append(const char* str, Uint32 size); 1257 static bool String::equalNoCase(const String& s1, const char* s2); 1258 String& operator=(const char* str) 1259 String& String::assign(const char* str) 1260 String& String::append(const char* str) 1261 Boolean operator==(const String& s1, const char* s2) 1262 Boolean operator==(const char* s1, const String& s2) 1263 Boolean operator!=(const String& s1, const char* s2) 1264 Boolean operator!=(const char* s1, const String& s2) 1265 Boolean operator<(const String& s1, const char* s2) 1266 Boolean operator<(const char* s1, const String& s2) 1267 Boolean operator>(const String& s1, const char* s2) 1268 mike 1.112 Boolean operator>(const char* s1, const String& s2) 1269 Boolean operator<=(const String& s1, const char* s2) 1270 Boolean operator<=(const char* s1, const String& s2) 1271 Boolean operator>=(const String& s1, const char* s2) 1272 Boolean operator>=(const char* s1, const String& s2) 1273 String operator+(const String& s1, const char* s2) 1274 String operator+(const char* s1, const String& s2) 1275
1276 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
1277 mike 1.112 power of two (algorithm from the book "Hacker's Delight"). 1278 1279 static Uint32 _roundUpToPow2(Uint32 x) 1280 { 1281 if (x < 8) 1282 return 8; 1283 1284 x--; 1285 x \|= (x >> 1); 1286 x \|= (x >> 2); 1287 x \|= (x >> 4); 1288 x \|= (x >> 8); 1289 x \|= (x >> 16); 1290 x++; 1291 1292 return x; 1293 } 1294 1295 8. Implemented "concatenating constructors" to eliminate temporaries
1296 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
1297 mike 1.112 optimization" described by Stan Lippman. 1298 1299 inline String operator+(const String& s1, const String& s2) 1300 { 1301 return String(s1, s2, 0); 1302 } 1303 1304 9. Experimented to find the optimial initial size for a short string. 1305 Eight seems to offer the best tradeoff between space and time. 1306 1307 10. Inlined all members of the Char16 class. 1308 1309 11. Used Uint16 internally in the String class. This showed no improvememnt 1310 since Char16 was already fully inlined and was essentially reduced to 1311 Uint16 in any case. 1312 1313 12. Implemented conditional logic (#if) allowing error checking logic to
1314 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
1315 mike 1.112 and null-pointer checking. 1316 1317 13. Used memcpy() and memcmp() where possible. These are implemented using 1318 the rep family of intructions under Intel and are much faster. 1319
1320 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1321 mike 1.112 copy routine overhead. 1322 1323 15. Added ASCII7 form of the constructor and assign(). 1324 1325 String s("hello world", String::ASCII7); 1326 1327 s.assignASCII7("hello world"); 1328 1329 This avoids slower UTF8 processing when not needed. 1330 1331 ================================================================================ 1332 */

No CVS admin address has been configured