pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.119 //%2006////////////////////////////////////////////////////////////////////////
2 mike 1.27 //
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 karl 1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group.
13 mike 1.27 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
18 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions:
20 karl 1.119 //
21 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
22 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
24 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 // 32 //%///////////////////////////////////////////////////////////////////////////// 33
34 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
35 mike 1.113 #include <cstring>
36 kumpf 1.48 #include "InternalException.h"
37 david 1.69 #include "CommonUTF.h"
38 mike 1.112 #include "MessageLoader.h" 39 #include "StringRep.h"
40 david 1.69 41 #ifdef PEGASUS_HAS_ICU
42 chuck 1.99 #include <unicode/ustring.h> 43 #include <unicode/uchar.h>
44 david 1.69 #endif 45
46 mike 1.112 PEGASUS_NAMESPACE_BEGIN
47 mike 1.28
48 mike 1.112 //============================================================================== 49 // 50 // Compile-time macros (undefined by default). 51 // 52 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 53 // 54 //==============================================================================
55 mike 1.27
56 mike 1.112 //==============================================================================
57 kumpf 1.39 //
58 mike 1.112 // File-scope definitions:
59 kumpf 1.54 //
60 mike 1.112 //============================================================================== 61 62 // Note: this table is much faster than the system toupper(). Please do not 63 // change.
64 kumpf 1.54
65 david.dillard 1.116 const Uint8 _toUpperTable[256] =
66 kumpf 1.54 {
67 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 68 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 69 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 70 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 71 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 72 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 73 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 74 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 75 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 76 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 77 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 78 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 79 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 80 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 81 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 82 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 83 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 84 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 85 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 86 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 87 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 88 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 89 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 90 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 91 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 92 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 93 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 94 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 95 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 96 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 97 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 98 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 99 }; 100 101 // Note: this table is much faster than the system tulower(). Please do not 102 // change. 103
104 david.dillard 1.116 const Uint8 _toLowerTable[256] =
105 mike 1.112 { 106 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 107 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 108 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 109 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 110 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 111 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 112 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 113 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 114 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 115 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 116 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 117 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 118 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 119 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 120 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 121 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 122 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 123 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 124 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 125 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 126 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 127 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 128 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 129 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 130 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 131 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 132 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 133 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 134 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 135 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 136 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 137 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 138 }; 139 140 // Converts 16-bit characters to upper case. This routine is faster than the 141 // system toupper(). Please do not change. 142 inline Uint16 _toUpper(Uint16 x) 143 { 144 return (x & 0xFF00) ? x : _toUpperTable[x];
145 kumpf 1.54 } 146
147 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 148 // system toupper(). Please do not change. 149 inline Uint16 _toLower(Uint16 x)
150 kumpf 1.54 {
151 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 152 } 153 154 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 155 static Uint32 _roundUpToPow2(Uint32 x) 156 {
157 dave.sudlik 1.120 // Check for potential overflow in x 158 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
159 mike 1.112 160 if (x < 8) 161 return 8; 162 163 x--; 164 x \|= (x >> 1); 165 x \|= (x >> 2); 166 x \|= (x >> 4); 167 x \|= (x >> 8); 168 x \|= (x >> 16); 169 x++; 170 171 return x; 172 } 173 174 template<class P, class Q> 175 static void _copy(P* p, const Q* q, size_t n) 176 { 177 // The following employs loop unrolling for efficiency. Please do not 178 // eliminate. 179 180 mike 1.112 while (n >= 8) 181 { 182 p[0] = q[0]; 183 p[1] = q[1]; 184 p[2] = q[2]; 185 p[3] = q[3]; 186 p[4] = q[4]; 187 p[5] = q[5]; 188 p[6] = q[6]; 189 p[7] = q[7]; 190 p += 8; 191 q += 8; 192 n -= 8; 193 } 194 195 while (n >= 4) 196 { 197 p[0] = q[0]; 198 p[1] = q[1]; 199 p[2] = q[2]; 200 p[3] = q[3]; 201 mike 1.112 p += 4; 202 q += 4; 203 n -= 4; 204 } 205 206 while (n--) 207 p++ = q++; 208 } 209 210 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 211 { 212 // The following employs loop unrolling for efficiency. Please do not 213 // eliminate. 214 215 while (n >= 4) 216 { 217 if (s[0] == c) 218 return (Uint16)s; 219 if (s[1] == c) 220 return (Uint16)&s[1]; 221 if (s[2] == c) 222 mike 1.112 return (Uint16)&s[2]; 223 if (s[3] == c) 224 return (Uint16)&s[3];
225 kumpf 1.82
226 mike 1.112 n -= 4; 227 s += 4; 228 } 229 230 if (n) 231 { 232 if (s == c) 233 return (Uint16)s; 234 s++; 235 n--; 236 } 237 238 if (n) 239 { 240 if (s == c) 241 return (Uint16)s; 242 s++; 243 n--; 244 } 245 246 if (n && s == c) 247 mike 1.112 return (Uint16)s; 248 249 // Not found! 250 return 0; 251 } 252 253 static int _compare(const Uint16* s1, const Uint16* s2) 254 { 255 while (s1 && s2) 256 { 257 int r = s1++ - s2++; 258 259 if (r) 260 return r; 261 } 262 263 if (s2) 264 return -1; 265 else if (s1) 266 return 1; 267 268 mike 1.112 return 0; 269 } 270 271 static int _compareNoUTF8(const Uint16* s1, const char* s2) 272 { 273 Uint16 c1; 274 Uint16 c2; 275 276 do 277 { 278 c1 = s1++; 279 c2 = s2++; 280 281 if (c1 == 0) 282 return c1 - c2; 283 } 284 while (c1 == c2); 285 286 return c1 - c2; 287 } 288 289 mike 1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 290 { 291 memcpy(s1, s2, n * sizeof(Uint16)); 292 } 293 294 void StringThrowOutOfBounds() 295 { 296 throw IndexOutOfBoundsException(); 297 } 298 299 inline void _checkNullPointer(const void* ptr) 300 { 301 if (!ptr) 302 throw NullPointer(); 303 } 304 305 static void _StringThrowBadUTF8(Uint32 index) 306 { 307 MessageLoaderParms parms( 308 "Common.String.BAD_UTF8", 309 "The byte sequence starting at index $0 " 310 mike 1.112 "is not valid UTF-8 encoding.", 311 index); 312 throw Exception(parms); 313 } 314 315 static size_t _copyFromUTF8(
316 david.dillard 1.116 Uint16* dest, 317 const char* src,
318 mike 1.112 size_t n, 319 size_t& utf8_error_index) 320 { 321 Uint16* p = dest; 322 const Uint8* q = (const Uint8)src; 323 324 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). 325 // Use loop-unrolling. 326 327 while (n >=8 && ((q[0]\|q[1]\|q[2]\|q[3]\|q[4]\|q[5]\|q[6]\|q[7]) & 0x80) == 0) 328 { 329 p[0] = q[0]; 330 p[1] = q[1]; 331 p[2] = q[2]; 332 p[3] = q[3]; 333 p[4] = q[4]; 334 p[5] = q[5]; 335 p[6] = q[6]; 336 p[7] = q[7]; 337 p += 8; 338 q += 8; 339 mike 1.112 n -= 8; 340 } 341 342 while (n >=4 && ((q[0]\|q[1]\|q[2]\|q[3]) & 0x80) == 0) 343 { 344 p[0] = q[0]; 345 p[1] = q[1]; 346 p[2] = q[2]; 347 p[3] = q[3]; 348 p += 4; 349 q += 4; 350 n -= 4; 351 } 352 353 switch (n) 354 { 355 case 0: 356 return p - dest; 357 case 1: 358 if (q[0] < 128) 359 { 360 mike 1.112 p[0] = q[0]; 361 return p + 1 - dest; 362 } 363 break; 364 case 2: 365 if (((q[0]\|q[1]) & 0x80) == 0) 366 { 367 p[0] = q[0]; 368 p[1] = q[1]; 369 return p + 2 - dest; 370 } 371 break; 372 case 3: 373 if (((q[0]\|q[1]\|q[2]) & 0x80) == 0) 374 { 375 p[0] = q[0]; 376 p[1] = q[1]; 377 p[2] = q[2]; 378 return p + 3 - dest; 379 } 380 break; 381 mike 1.112 } 382 383 // Process remaining characters. 384 385 while (n) 386 { 387 // Optimize for 7-bit ASCII case. 388 389 if (q < 128) 390 { 391 p++ = q++; 392 n--; 393 } 394 else 395 { 396 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(q) + 1; 397 398 if (c > n \|\| !isValid_U8(q, c) \|\| 399 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 400 { 401 utf8_error_index = q - (const Uint8)src; 402 mike 1.112 return size_t(-1); 403 } 404 405 n -= c; 406 } 407 } 408 409 return p - dest; 410 } 411
412 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
413 mike 1.112 // terminator). 414 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 415 { 416 // The following employs loop unrolling for efficiency. Please do not 417 // eliminate. 418 419 const Uint16* q = src; 420 Uint8* p = (Uint8*)dest; 421 422 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
423 kumpf 1.82 {
424 mike 1.112 p[0] = q[0]; 425 p[1] = q[1]; 426 p[2] = q[2]; 427 p[3] = q[3]; 428 p += 4; 429 q += 4; 430 n -= 4;
431 kumpf 1.82 }
432 mike 1.112 433 switch (n) 434 { 435 case 0: 436 return p - (Uint8)dest; 437 case 1: 438 if (q[0] < 128) 439 { 440 p[0] = q[0]; 441 return p + 1 - (Uint8)dest; 442 } 443 break; 444 case 2: 445 if (q[0] < 128 && q[1] < 128) 446 { 447 p[0] = q[0]; 448 p[1] = q[1]; 449 return p + 2 - (Uint8)dest; 450 } 451 break; 452 case 3: 453 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 454 { 455 p[0] = q[0]; 456 p[1] = q[1]; 457 p[2] = q[2]; 458 return p + 3 - (Uint8)dest; 459 } 460 break; 461 } 462 463 // If this line was reached, there must be characters greater than 128. 464 465 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 466 467 return p - (Uint8*)dest;
468 kumpf 1.54 } 469
470 mike 1.112 static inline size_t _convert( 471 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
472 kumpf 1.54 {
473 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 474 _copy(p, q, n); 475 return n; 476 #else 477 return _copyFromUTF8(p, q, n, utf8_error_index); 478 #endif
479 kumpf 1.54 } 480
481 mike 1.112 //============================================================================== 482 // 483 // class CString 484 // 485 //============================================================================== 486 487 CString::CString(const CString& cstr) : _rep(0)
488 kumpf 1.54 {
489 mike 1.112 if (cstr._rep)
490 kumpf 1.82 {
491 mike 1.112 size_t n = strlen(cstr._rep) + 1; 492 _rep = (char*)operator new(n); 493 memcpy(_rep, cstr._rep, n);
494 kumpf 1.82 }
495 kumpf 1.54 } 496
497 kumpf 1.56 CString& CString::operator=(const CString& cstr) 498 {
499 kumpf 1.82 if (&cstr != this)
500 kumpf 1.81 {
501 kumpf 1.82 if (_rep) 502 {
503 mike 1.112 operator delete(_rep);
504 kumpf 1.82 _rep = 0; 505 }
506 mike 1.112
507 kumpf 1.82 if (cstr._rep) 508 {
509 mike 1.112 size_t n = strlen(cstr._rep) + 1; 510 _rep = (char*)operator new(n); 511 memcpy(_rep, cstr._rep, n);
512 kumpf 1.82 }
513 kumpf 1.81 }
514 mike 1.112
515 kumpf 1.56 return *this; 516 } 517
518 mike 1.112 //==============================================================================
519 kumpf 1.54 //
520 mike 1.112 // class StringRep
521 kumpf 1.39 //
522 mike 1.112 //==============================================================================
523 kumpf 1.39
524 mike 1.112 StringRep StringRep::_emptyRep;
525 mike 1.27
526 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
527 mike 1.27 {
528 dave.sudlik 1.120 // Check for potential overflow in cap 529 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
530 mike 1.27
531 mike 1.112 StringRep* rep = (StringRep)::operator new( 532 sizeof(StringRep) + cap sizeof(Uint16)); 533 rep->cap = cap; 534 new(&rep->refs) AtomicInt(1); 535 536 return rep;
537 mike 1.27 } 538
539 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
540 chuck 1.102 {
541 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
542 chuck 1.102 {
543 mike 1.112 size_t n = _roundUpToPow2(cap); 544 StringRep* newRep = StringRep::alloc(n); 545 newRep->size = rep->size; 546 _copy(newRep->data, rep->data, rep->size + 1); 547 StringRep::unref(rep); 548 rep = newRep; 549 } 550 }
551 david.dillard 1.105
552 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 553 { 554 StringRep* rep = StringRep::alloc(size); 555 rep->size = size; 556 _copy(rep->data, data, size); 557 rep->data[size] = '\0'; 558 return rep; 559 }
560 chuck 1.102
561 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 562 { 563 // Return a new copy of rep. Release rep.
564 chuck 1.102
565 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 566 newRep->size = rep->size; 567 _copy(newRep->data, rep->data, rep->size); 568 newRep->data[newRep->size] = '\0'; 569 StringRep::unref(rep); 570 return newRep;
571 chuck 1.102 } 572
573 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
574 kumpf 1.43 {
575 mike 1.112 StringRep* rep = StringRep::alloc(size); 576 size_t utf8_error_index; 577 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 578 579 if (rep->size == size_t(-1)) 580 { 581 StringRep::free(rep);
582 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
583 mike 1.112 }
584 kumpf 1.43
585 mike 1.112 rep->data[rep->size] = '\0';
586 kumpf 1.43
587 mike 1.112 return rep;
588 mike 1.27 } 589
590 mike 1.112 Uint32 StringRep::length(const Uint16* str)
591 mike 1.27 {
592 mike 1.112 // Note: We could unroll this but it is rarely called. 593 594 const Uint16* end = (Uint16)str; 595 596 while (end++) 597 ; 598
599 a.dunfey 1.125 return (Uint32)(end - str - 1);
600 kumpf 1.39 }
601 tony 1.66
602 mike 1.112 //============================================================================== 603 // 604 // class String 605 // 606 //============================================================================== 607 608 const String String::EMPTY;
609 mike 1.27
610 kumpf 1.39 String::String(const String& str, Uint32 n) 611 {
612 mike 1.112 _checkBounds(n, str._rep->size); 613 _rep = StringRep::create(str._rep->data, n);
614 kumpf 1.39 } 615 616 String::String(const Char16* str) 617 {
618 mike 1.112 _checkNullPointer(str); 619 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
620 mike 1.27 } 621
622 kumpf 1.39 String::String(const Char16* str, Uint32 n) 623 {
624 mike 1.112 _checkNullPointer(str); 625 _rep = StringRep::create((Uint16*)str, n);
626 kumpf 1.39 } 627 628 String::String(const char* str)
629 mike 1.27 {
630 mike 1.112 _checkNullPointer(str);
631 david.dillard 1.105
632 mike 1.112 // Set this just in case create() throws an exception. 633 _rep = &StringRep::_emptyRep; 634 _rep = StringRep::create(str, strlen(str));
635 mike 1.27 } 636
637 kumpf 1.39 String::String(const char* str, Uint32 n)
638 mike 1.27 {
639 mike 1.112 _checkNullPointer(str);
640 david.dillard 1.105
641 mike 1.112 // Set this just in case create() throws an exception. 642 _rep = &StringRep::_emptyRep; 643 _rep = StringRep::create(str, n);
644 kumpf 1.39 }
645 mike 1.27
646 mike 1.112 String::String(const String& s1, const String& s2)
647 kumpf 1.39 {
648 mike 1.112 size_t n1 = s1._rep->size; 649 size_t n2 = s2._rep->size; 650 size_t n = n1 + n2; 651 _rep = StringRep::alloc(n); 652 _copy(_rep->data, s1._rep->data, n1); 653 _copy(_rep->data + n1, s2._rep->data, n2); 654 _rep->size = n; 655 _rep->data[n] = '\0';
656 mike 1.27 } 657
658 mike 1.112 String::String(const String& s1, const char* s2)
659 mike 1.27 {
660 mike 1.112 _checkNullPointer(s2); 661 size_t n1 = s1._rep->size; 662 size_t n2 = strlen(s2); 663 _rep = StringRep::alloc(n1 + n2); 664 _copy(_rep->data, s1._rep->data, n1); 665 size_t utf8_error_index; 666 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 667 668 if (tmp == size_t(-1))
669 kumpf 1.82 {
670 mike 1.112 StringRep::free(_rep); 671 _rep = &StringRep::_emptyRep;
672 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
673 kumpf 1.82 }
674 mike 1.112 675 _rep->size = n1 + tmp; 676 _rep->data[_rep->size] = '\0';
677 mike 1.27 } 678
679 mike 1.112 String::String(const char* s1, const String& s2)
680 mike 1.27 {
681 mike 1.112 _checkNullPointer(s1); 682 size_t n1 = strlen(s1); 683 size_t n2 = s2._rep->size; 684 _rep = StringRep::alloc(n1 + n2); 685 size_t utf8_error_index; 686 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 687 688 if (tmp == size_t(-1)) 689 { 690 StringRep::free(_rep); 691 _rep = &StringRep::_emptyRep;
692 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
693 mike 1.112 } 694 695 _rep->size = n2 + tmp; 696 _copy(_rep->data + n1, s2._rep->data, n2); 697 _rep->data[_rep->size] = '\0';
698 mike 1.27 } 699
700 mike 1.112 String& String::assign(const String& str)
701 mike 1.27 {
702 mike 1.112 if (_rep != str._rep)
703 david.dillard 1.105 {
704 mike 1.112 StringRep::unref(_rep); 705 StringRep::ref(_rep = str._rep);
706 david.dillard 1.105 } 707
708 mike 1.27 return this; 709 } 710 711 String& String::assign(const Char16 str, Uint32 n) 712 {
713 mike 1.112 _checkNullPointer(str); 714
715 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
716 david.dillard 1.105 {
717 mike 1.112 StringRep::unref(_rep); 718 _rep = StringRep::alloc(n);
719 david.dillard 1.105 } 720
721 mike 1.112 _rep->size = n; 722 _copy(_rep->data, (Uint16*)str, n); 723 _rep->data[n] = '\0'; 724
725 mike 1.27 return *this; 726 } 727
728 mike 1.112 String& String::assign(const char* str, Uint32 n)
729 chuck 1.102 {
730 mike 1.112 _checkNullPointer(str); 731
732 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
733 david.dillard 1.105 {
734 mike 1.112 StringRep::unref(_rep); 735 _rep = StringRep::alloc(n);
736 david.dillard 1.105 } 737
738 mike 1.112 size_t utf8_error_index; 739 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
740 chuck 1.102
741 mike 1.112 if (_rep->size == size_t(-1))
742 david.dillard 1.105 {
743 mike 1.112 StringRep::free(_rep); 744 _rep = &StringRep::_emptyRep;
745 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
746 david.dillard 1.105 }
747 mike 1.112 748 _rep->data[_rep->size] = 0;
749 david.dillard 1.105
750 mike 1.27 return *this; 751 } 752
753 kumpf 1.39 void String::clear() 754 {
755 mike 1.112 if (_rep->size) 756 {
757 mike 1.114 if (_rep->refs.get() == 1)
758 mike 1.112 { 759 _rep->size = 0; 760 _rep->data[0] = '\0'; 761 } 762 else 763 { 764 StringRep::unref(_rep); 765 _rep = &StringRep::_emptyRep; 766 } 767 }
768 kumpf 1.39 } 769
770 mike 1.112 void String::reserveCapacity(Uint32 cap)
771 kumpf 1.39 {
772 mike 1.112 _reserve(_rep, cap);
773 kumpf 1.39 } 774
775 mike 1.112 CString String::getCString() const 776 {
777 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16 778 // counterpart, so we allocate extra memory for the worst case. In the
779 mike 1.112 // best case, we may need only one third of the memory allocated. But
780 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since 781 // CString objects are usually short-lived (disappearing after only a few
782 mike 1.112 // instructions). CString objects are typically created on the stack as 783 // means to obtain a char* pointer. 784 785 #ifdef PEGASUS_STRING_NO_UTF8 786 char* str = (char*)operator new(_rep->size + 1); 787 _copy(str, _rep->data, _rep->size); 788 str[_rep->size] = '\0'; 789 return CString(str);
790 gs.keenan 1.110 #else
791 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
792 mike 1.112 char* str = (char*)operator new(n + 1); 793 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 794 str[size] = '\0'; 795 return CString(str);
796 gs.keenan 1.110 #endif
797 kumpf 1.39 } 798
799 mike 1.112 String& String::append(const Char16* str, Uint32 n)
800 kumpf 1.39 {
801 mike 1.112 _checkNullPointer(str); 802 803 size_t oldSize = _rep->size; 804 size_t newSize = oldSize + n;
805 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
806 mike 1.112 _copy(_rep->data + oldSize, (Uint16)str, n); 807 _rep->size = newSize; 808 _rep->data[newSize] = '\0'; 809 810 return this;
811 kumpf 1.39 } 812
813 mike 1.112 String& String::append(const String& str)
814 mike 1.27 {
815 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
816 mike 1.27 } 817
818 mike 1.112 String& String::append(const char* str, Uint32 size)
819 mike 1.27 {
820 mike 1.112 _checkNullPointer(str); 821 822 size_t oldSize = _rep->size; 823 size_t cap = oldSize + size; 824
825 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
826 mike 1.112 size_t utf8_error_index; 827 size_t tmp = _convert( 828 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 829 830 if (tmp == size_t(-1)) 831 { 832 StringRep::free(_rep); 833 _rep = &StringRep::_emptyRep;
834 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
835 mike 1.112 }
836 mike 1.27
837 mike 1.112 _rep->size += tmp; 838 _rep->data[_rep->size] = '\0';
839 mike 1.27
840 kumpf 1.39 return *this; 841 } 842
843 mike 1.112 void String::remove(Uint32 index, Uint32 n)
844 mike 1.27 {
845 mike 1.112 if (n == PEG_NOT_FOUND)
846 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
847 mike 1.112 848 _checkBounds(index + n, _rep->size); 849
850 mike 1.114 if (_rep->refs.get() != 1)
851 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
852 mike 1.27
853 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
854 mike 1.27
855 mike 1.112 size_t rem = _rep->size - (index + n); 856 Uint16* data = _rep->data;
857 mike 1.27
858 mike 1.112 if (rem) 859 memmove(data + index, data + index + n, rem * sizeof(Uint16));
860 mike 1.27
861 mike 1.112 _rep->size -= n; 862 data[_rep->size] = '\0';
863 mike 1.27 } 864
865 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
866 mike 1.27 {
867 mike 1.112 // Note: this implementation is very permissive but used for 868 // backwards compatibility. 869 870 if (index < _rep->size)
871 mike 1.27 {
872 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index)
873 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
874 mike 1.27
875 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
876 mike 1.27 }
877 david.dillard 1.105 878 return String();
879 mike 1.27 } 880 881 Uint32 String::find(Char16 c) const 882 {
883 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
884 mike 1.27
885 mike 1.112 if (p)
886 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
887 mike 1.27 888 return PEG_NOT_FOUND; 889 } 890
891 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
892 mike 1.30 {
893 mike 1.112 _checkBounds(index, _rep->size); 894 895 if (index >= _rep->size) 896 return PEG_NOT_FOUND; 897 898 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
899 mike 1.30
900 mike 1.112 if (p)
901 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
902 mike 1.30 903 return PEG_NOT_FOUND; 904 } 905
906 mike 1.112 Uint32 StringFindAux( 907 const StringRep* _rep, const Char16* s, Uint32 n)
908 mike 1.27 {
909 mike 1.112 _checkNullPointer(s);
910 mike 1.27
911 mike 1.112 const Uint16* data = _rep->data; 912 size_t rem = _rep->size; 913 914 while (n <= rem)
915 mike 1.30 {
916 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 917 918 if (!p) 919 break;
920 mike 1.30
921 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
922 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
923 david.dillard 1.105
924 mike 1.112 p++; 925 rem -= p - data; 926 data = p;
927 mike 1.27 }
928 mike 1.112
929 mike 1.27 return PEG_NOT_FOUND; 930 } 931
932 mike 1.112 Uint32 String::find(const char* s) const 933 { 934 _checkNullPointer(s); 935 936 // Note: could optimize away creation of temporary, but this is rarely 937 // called. 938 return find(String(s)); 939 } 940
941 mike 1.27 Uint32 String::reverseFind(Char16 c) const 942 {
943 mike 1.112 Uint16 x = c; 944 Uint16* p = _rep->data; 945 Uint16* q = _rep->data + _rep->size;
946 mike 1.27
947 mike 1.112 while (q != p)
948 mike 1.27 {
949 mike 1.112 if (*--q == x)
950 david.dillard 1.116 return static_cast<Uint32>(q - p);
951 mike 1.27 } 952 953 return PEG_NOT_FOUND; 954 } 955 956 void String::toLower() 957 {
958 david 1.69 #ifdef PEGASUS_HAS_ICU
959 mike 1.112
960 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
961 david 1.90 {
962 mike 1.114 if (_rep->refs.get() != 1)
963 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 964
965 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
966 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 967 // prevents optimizations where the us-ascii is converted before
968 mike 1.112 // calling ICU.
969 yi.zhou 1.108 // The string may shrink or expand after the convert. 970
971 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 972 //// only the size when zero is passed as the destination size argument. 973
974 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 975
976 mike 1.112 int32_t newSize = u_strToLower( 977 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
978 david.dillard 1.116
979 mike 1.112 err = U_ZERO_ERROR; 980 981 //// Reserve enough space for the result. 982 983 if ((Uint32)newSize > _rep->cap) 984 _reserve(_rep, newSize); 985 986 //// Perform the conversion (overlapping buffers are allowed).
987 chuck 1.99
988 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 989 (UChar)_rep->data, _rep->size, NULL, &err);
990 yi.zhou 1.108
991 mike 1.112 _rep->size = newSize; 992 return;
993 david 1.90 }
994 mike 1.112 995 #endif /* PEGASUS_HAS_ICU */ 996
997 mike 1.114 if (_rep->refs.get() != 1)
998 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 999 1000 Uint16* p = _rep->data; 1001 size_t n = _rep->size; 1002 1003 for (; n--; p++)
1004 david 1.90 {
1005 mike 1.112 if (!(p & 0xFF00)) 1006 p = _toLower(*p);
1007 mike 1.27 }
1008 kumpf 1.39 } 1009
1010 chuck 1.99 void String::toUpper()
1011 david 1.90 { 1012 #ifdef PEGASUS_HAS_ICU
1013 mike 1.112
1014 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1015 chuck 1.99 {
1016 mike 1.114 if (_rep->refs.get() != 1)
1017 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1018
1019 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
1020 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 1021 // prevents optimizations where the us-ascii is converted before
1022 mike 1.112 // calling ICU.
1023 yi.zhou 1.108 // The string may shrink or expand after the convert. 1024
1025 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 1026 //// only the size when zero is passed as the destination size argument. 1027
1028 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1029
1030 mike 1.112 int32_t newSize = u_strToUpper( 1031 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 1032 1033 err = U_ZERO_ERROR; 1034 1035 //// Reserve enough space for the result. 1036 1037 if ((Uint32)newSize > _rep->cap) 1038 _reserve(_rep, newSize); 1039 1040 //// Perform the conversion (overlapping buffers are allowed). 1041 1042 u_strToUpper((UChar)_rep->data, newSize, 1043 (UChar*)_rep->data, _rep->size, NULL, &err);
1044 chuck 1.99
1045 mike 1.112 _rep->size = newSize;
1046 yi.zhou 1.108
1047 mike 1.112 return;
1048 david 1.91 }
1049 mike 1.112 1050 #endif /* PEGASUS_HAS_ICU */ 1051
1052 mike 1.114 if (_rep->refs.get() != 1)
1053 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1054 1055 Uint16* p = _rep->data; 1056 size_t n = _rep->size; 1057 1058 for (; n--; p++) 1059 p = _toUpper(p);
1060 david 1.90 } 1061
1062 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
1063 kumpf 1.39 {
1064 kumpf 1.118 const Uint16* p1 = s1._rep->data; 1065 const Uint16* p2 = s2._rep->data;
1066 mike 1.27
1067 kumpf 1.118 while (n--) 1068 { 1069 int r = p1++ - p2++; 1070 if (r) 1071 { 1072 return r; 1073 } 1074 else if (!p1[-1]) 1075 { 1076 // We must have encountered a null terminator in both s1 and s2 1077 return 0; 1078 } 1079 } 1080 return 0;
1081 mike 1.27 } 1082
1083 kumpf 1.43 int String::compare(const String& s1, const String& s2)
1084 mike 1.30 {
1085 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 1086 }
1087 kumpf 1.43
1088 mike 1.112 int String::compare(const String& s1, const char* s2) 1089 { 1090 _checkNullPointer(s2);
1091 mike 1.30
1092 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 1093 return _compareNoUTF8(s1._rep->data, s2); 1094 #else 1095 // ATTN: optimize this! 1096 return String::compare(s1, String(s2)); 1097 #endif
1098 mike 1.30 } 1099
1100 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
1101 kumpf 1.40 {
1102 david 1.69 #ifdef PEGASUS_HAS_ICU
1103 mike 1.112
1104 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1105 {
1106 mike 1.112 return u_strcasecmp(
1107 dave.sudlik 1.124 (const UChar)str1._rep->data, 1108 (const UChar)str2._rep->data, 1109 U_FOLD_CASE_DEFAULT 1110 );
1111 yi.zhou 1.108 }
1112 kumpf 1.40
1113 mike 1.112 #endif /* PEGASUS_HAS_ICU / 1114 1115 const Uint16 s1 = str1._rep->data; 1116 const Uint16* s2 = str2._rep->data; 1117 1118 while (s1 && s2)
1119 kumpf 1.40 {
1120 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
1121 kumpf 1.40
1122 david.dillard 1.105 if (r) 1123 return r;
1124 kumpf 1.40 } 1125
1126 mike 1.112 if (*s2)
1127 david.dillard 1.105 return -1;
1128 mike 1.112 else if (*s1)
1129 david.dillard 1.105 return 1;
1130 kumpf 1.40 1131 return 0; 1132 } 1133
1134 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1135 mike 1.27 {
1136 mike 1.112 #ifdef PEGASUS_HAS_ICU 1137 1138 return String::compareNoCase(s1, s2) == 0; 1139 1140 #else /* PEGASUS_HAS_ICU */
1141 mike 1.27
1142 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 1143 // eliminate.
1144 kumpf 1.39
1145 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1146 Uint16 q = (Uint16*)s2.getChar16Data(); 1147 Uint32 n = s2.size(); 1148 1149 while (n >= 8) 1150 { 1151 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1152 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1153 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1154 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1155 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1156 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1157 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1158 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1159 { 1160 return false; 1161 }
1162 kumpf 1.39
1163 mike 1.112 n -= 8; 1164 p += 8; 1165 q += 8; 1166 }
1167 mike 1.27
1168 mike 1.112 while (n >= 4)
1169 kumpf 1.39 {
1170 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1171 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1172 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1173 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1174 david.dillard 1.105 {
1175 mike 1.112 return false;
1176 david.dillard 1.105 }
1177 mike 1.112 1178 n -= 4; 1179 p += 4; 1180 q += 4; 1181 } 1182 1183 while (n--) 1184 { 1185 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1186 david.dillard 1.105 return false;
1187 mike 1.112 1188 p++; 1189 q++;
1190 kumpf 1.39 }
1191 mike 1.28
1192 kumpf 1.39 return true;
1193 mike 1.112 1194 #endif /* PEGASUS_HAS_ICU */
1195 david 1.69 } 1196
1197 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1198 david 1.69 {
1199 mike 1.112 _checkNullPointer(s2);
1200 david 1.69
1201 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1202 david 1.69
1203 mike 1.112 return String::equalNoCase(s1, String(s2));
1204 david 1.69
1205 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1206 david 1.69
1207 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1208 const char p2 = s2; 1209 size_t n = s1._rep->size;
1210 david.dillard 1.105
1211 mike 1.112 while (n--) 1212 { 1213 if (!*p2) 1214 return false;
1215 david 1.71
1216 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1217 return false; 1218 }
1219 kumpf 1.42
1220 mike 1.112 if (*p2) 1221 return false;
1222 david.dillard 1.116
1223 mike 1.112 return true;
1224 karl 1.36
1225 mike 1.112 #else /* PEGASUS_HAS_ICU */
1226 david.dillard 1.105
1227 mike 1.112 // ATTN: optimize this! 1228 return String::equalNoCase(s1, String(s2));
1229 david.dillard 1.105
1230 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1231 }
1232 chuck 1.78
1233 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1234 karl 1.36 {
1235 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1236 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1237 karl 1.36 } 1238
1239 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1240 { 1241 #ifdef PEGASUS_STRING_NO_UTF8
1242 kumpf 1.35
1243 mike 1.112 _checkNullPointer(s2);
1244 kumpf 1.39
1245 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1246 const char q = s2;
1247 kumpf 1.39
1248 mike 1.112 while (p && q) 1249 { 1250 if (p++ != Uint16(q++)) 1251 return false; 1252 }
1253 kumpf 1.39
1254 mike 1.112 return !(p \|\| q);
1255 kumpf 1.39
1256 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1257 kumpf 1.39
1258 mike 1.112 return String::equal(s1, String(s2));
1259 kumpf 1.39
1260 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1261 kumpf 1.39 } 1262
1263 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1264 kumpf 1.39 {
1265 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1266 david 1.69
1267 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1268 {
1269 david.dillard 1.105 char buf = NULL; 1270 const int size = str.size() 6;
1271 mike 1.112 UnicodeString UniStr( 1272 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1273 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1274 buf = new char[bufsize+1]; 1275 UniStr.extract(0,bufsize,buf); 1276 os << buf; 1277 os.flush(); 1278 delete [] buf;
1279 david.dillard 1.116 return os;
1280 yi.zhou 1.108 }
1281 mike 1.112
1282 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
1283 mike 1.112 1284 for (Uint32 i = 0, n = str.size(); i < n; i++)
1285 yi.zhou 1.108 {
1286 mike 1.112 Uint16 code = str[i];
1287 david.dillard 1.105
1288 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1289 os << char(code); 1290 else 1291 { 1292 // Print in hex format: 1293 char buffer[8]; 1294 sprintf(buffer, "\\x%04X", code); 1295 os << buffer;
1296 david.dillard 1.105 }
1297 yi.zhou 1.108 }
1298 kumpf 1.39 1299 return os; 1300 } 1301
1302 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1303 kumpf 1.39 {
1304 mike 1.112 StringRep* tmp; 1305 1306 if (_rep->cap) 1307 { 1308 tmp = StringRep::alloc(2 * _rep->cap); 1309 tmp->size = _rep->size; 1310 _copy(tmp->data, _rep->data, _rep->size); 1311 } 1312 else 1313 { 1314 tmp = StringRep::alloc(8); 1315 tmp->size = 0; 1316 } 1317 1318 StringRep::unref(_rep); 1319 _rep = tmp;
1320 kumpf 1.39 } 1321
1322 mike 1.112 PEGASUS_NAMESPACE_END 1323 1324 /* 1325 ================================================================================ 1326 1327 String optimizations: 1328 1329 1. Added mechanism allowing certain functions to be inlined only when 1330 used by internal Pegasus modules. External modules (i.e., providers) 1331 link to a non-inline version, which allows for binary compatibility. 1332 1333 2. Implemented copy-on-write with atomic increment/decrement. This 1334 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1335 for the 'ni1000' benchmark. 1336 1337 3. Employed loop unrolling in several places. For example, see: 1338 1339 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1340 1341 4. Used the "empty-rep" optimization (described in whitepaper from the 1342 GCC Developers Summit). This reduced default construction to a simple 1343 mike 1.112 pointer assignment. 1344 1345 inline String::String() : _rep(&_emptyRep) { } 1346 1347 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1348 For example: 1349 1350 static const char _upper[] = 1351 { 1352 0,1,2,...255 1353 }; 1354 1355 inline Uint16 _toUpper(Uint16 x) 1356 { 1357 return (x & 0xFF00) ? x : _upper[x]; 1358 } 1359
1360 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
1361 mike 1.112 operation. 1362
1363 david.dillard 1.116 6. Implemented char* version of the following member functions to 1364 eliminate unecessary creation of anonymous string objects
1365 mike 1.112 (temporaries). 1366 1367 String(const String& s1, const char* s2); 1368 String(const char* s1, const String& s2); 1369 String& String::operator=(const char* str); 1370 Uint32 String::find(const char* s) const; 1371 bool String::equal(const String& s1, const char* s2); 1372 static int String::compare(const String& s1, const char* s2); 1373 String& String::append(const char* str); 1374 String& String::append(const char* str, Uint32 size); 1375 static bool String::equalNoCase(const String& s1, const char* s2); 1376 String& operator=(const char* str) 1377 String& String::assign(const char* str) 1378 String& String::append(const char* str) 1379 Boolean operator==(const String& s1, const char* s2) 1380 Boolean operator==(const char* s1, const String& s2) 1381 Boolean operator!=(const String& s1, const char* s2) 1382 Boolean operator!=(const char* s1, const String& s2) 1383 Boolean operator<(const String& s1, const char* s2) 1384 Boolean operator<(const char* s1, const String& s2) 1385 Boolean operator>(const String& s1, const char* s2) 1386 mike 1.112 Boolean operator>(const char* s1, const String& s2) 1387 Boolean operator<=(const String& s1, const char* s2) 1388 Boolean operator<=(const char* s1, const String& s2) 1389 Boolean operator>=(const String& s1, const char* s2) 1390 Boolean operator>=(const char* s1, const String& s2) 1391 String operator+(const String& s1, const char* s2) 1392 String operator+(const char* s1, const String& s2) 1393
1394 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
1395 mike 1.112 power of two (algorithm from the book "Hacker's Delight"). 1396 1397 static Uint32 _roundUpToPow2(Uint32 x) 1398 { 1399 if (x < 8) 1400 return 8; 1401 1402 x--; 1403 x \|= (x >> 1); 1404 x \|= (x >> 2); 1405 x \|= (x >> 4); 1406 x \|= (x >> 8); 1407 x \|= (x >> 16); 1408 x++; 1409 1410 return x; 1411 } 1412 1413 8. Implemented "concatenating constructors" to eliminate temporaries
1414 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
1415 mike 1.112 optimization" described by Stan Lippman. 1416 1417 inline String operator+(const String& s1, const String& s2) 1418 { 1419 return String(s1, s2, 0); 1420 } 1421 1422 9. Experimented to find the optimial initial size for a short string. 1423 Eight seems to offer the best tradeoff between space and time. 1424 1425 10. Inlined all members of the Char16 class. 1426 1427 11. Used Uint16 internally in the String class. This showed no improvememnt 1428 since Char16 was already fully inlined and was essentially reduced to 1429 Uint16 in any case. 1430 1431 12. Implemented conditional logic (#if) allowing error checking logic to
1432 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
1433 mike 1.112 and null-pointer checking. 1434 1435 13. Used memcpy() and memcmp() where possible. These are implemented using 1436 the rep family of intructions under Intel and are much faster. 1437
1438 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1439 mike 1.112 copy routine overhead. 1440 1441 15. Added ASCII7 form of the constructor and assign(). 1442 1443 String s("hello world", String::ASCII7); 1444 1445 s.assignASCII7("hello world"); 1446 1447 This avoids slower UTF8 processing when not needed. 1448 1449 ================================================================================ 1450 */

No CVS admin address has been configured