pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
2 martin 1.134 //
3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license 4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with 5 // this work for additional information regarding copyright ownership. 6 // Each contributor licenses this file to you under the OpenPegasus Open 7 // Source License; you may not use this file except in compliance with the 8 // License.
9 martin 1.134 //
10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a 11 // copy of this software and associated documentation files (the "Software"), 12 // to deal in the Software without restriction, including without limitation 13 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 // and/or sell copies of the Software, and to permit persons to whom the 15 // Software is furnished to do so, subject to the following conditions:
16 martin 1.134 //
17 martin 1.133 // The above copyright notice and this permission notice shall be included 18 // in all copies or substantial portions of the Software.
19 martin 1.134 //
20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 martin 1.134 //
28 martin 1.133 //////////////////////////////////////////////////////////////////////////
29 mike 1.27 // 30 //%///////////////////////////////////////////////////////////////////////////// 31
32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
33 mike 1.113 #include <cstring>
34 kumpf 1.48 #include "InternalException.h"
35 david 1.69 #include "CommonUTF.h"
36 mike 1.112 #include "MessageLoader.h" 37 #include "StringRep.h"
38 david 1.69 39 #ifdef PEGASUS_HAS_ICU
40 kumpf 1.132 # include <unicode/ures.h> 41 # include <unicode/ustring.h> 42 # include <unicode/uchar.h>
43 david 1.69 #endif 44
45 mike 1.112 PEGASUS_NAMESPACE_BEGIN
46 mike 1.28
47 mike 1.112 //============================================================================== 48 // 49 // Compile-time macros (undefined by default). 50 // 51 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 52 // 53 //==============================================================================
54 mike 1.27
55 mike 1.112 //==============================================================================
56 kumpf 1.39 //
57 mike 1.112 // File-scope definitions:
58 kumpf 1.54 //
59 mike 1.112 //============================================================================== 60 61 // Note: this table is much faster than the system toupper(). Please do not 62 // change.
63 kumpf 1.54
64 david.dillard 1.116 const Uint8 _toUpperTable[256] =
65 kumpf 1.54 {
66 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 67 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 68 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 69 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 70 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 71 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 72 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 73 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 74 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 75 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 76 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 77 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 78 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 79 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 80 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 81 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 82 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 83 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 84 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 85 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 86 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 87 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 88 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 89 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 90 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 91 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 92 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 93 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 94 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 95 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 96 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 97 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 98 }; 99 100 // Note: this table is much faster than the system tulower(). Please do not 101 // change. 102
103 david.dillard 1.116 const Uint8 _toLowerTable[256] =
104 mike 1.112 { 105 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 106 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 107 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 108 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 109 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 110 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 111 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 112 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 113 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 114 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 115 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 116 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 117 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 120 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 121 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 122 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 123 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 124 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 125 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 126 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 127 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 128 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 129 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 130 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 131 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 132 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 133 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 134 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 135 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 136 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 137 }; 138 139 // Converts 16-bit characters to upper case. This routine is faster than the 140 // system toupper(). Please do not change. 141 inline Uint16 _toUpper(Uint16 x) 142 { 143 return (x & 0xFF00) ? x : _toUpperTable[x];
144 kumpf 1.54 } 145
146 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 147 // system toupper(). Please do not change. 148 inline Uint16 _toLower(Uint16 x)
149 kumpf 1.54 {
150 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 151 } 152 153 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 154 static Uint32 _roundUpToPow2(Uint32 x) 155 {
156 dave.sudlik 1.120 // Check for potential overflow in x 157 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
158 mike 1.112 159 if (x < 8) 160 return 8; 161 162 x--; 163 x \|= (x >> 1); 164 x \|= (x >> 2); 165 x \|= (x >> 4); 166 x \|= (x >> 8); 167 x \|= (x >> 16); 168 x++; 169 170 return x; 171 } 172 173 template<class P, class Q> 174 static void _copy(P* p, const Q* q, size_t n) 175 { 176 // The following employs loop unrolling for efficiency. Please do not 177 // eliminate. 178 179 mike 1.112 while (n >= 8) 180 { 181 p[0] = q[0]; 182 p[1] = q[1]; 183 p[2] = q[2]; 184 p[3] = q[3]; 185 p[4] = q[4]; 186 p[5] = q[5]; 187 p[6] = q[6]; 188 p[7] = q[7]; 189 p += 8; 190 q += 8; 191 n -= 8; 192 } 193 194 while (n >= 4) 195 { 196 p[0] = q[0]; 197 p[1] = q[1]; 198 p[2] = q[2]; 199 p[3] = q[3]; 200 mike 1.112 p += 4; 201 q += 4; 202 n -= 4; 203 } 204 205 while (n--) 206 p++ = q++; 207 } 208 209 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 210 { 211 // The following employs loop unrolling for efficiency. Please do not 212 // eliminate. 213 214 while (n >= 4) 215 { 216 if (s[0] == c) 217 return (Uint16)s; 218 if (s[1] == c) 219 return (Uint16)&s[1]; 220 if (s[2] == c) 221 mike 1.112 return (Uint16)&s[2]; 222 if (s[3] == c) 223 return (Uint16)&s[3];
224 kumpf 1.82
225 mike 1.112 n -= 4; 226 s += 4; 227 } 228 229 if (n) 230 { 231 if (s == c) 232 return (Uint16)s; 233 s++; 234 n--; 235 } 236 237 if (n) 238 { 239 if (s == c) 240 return (Uint16)s; 241 s++; 242 n--; 243 } 244 245 if (n && s == c) 246 mike 1.112 return (Uint16)s; 247 248 // Not found! 249 return 0; 250 } 251 252 static int _compare(const Uint16* s1, const Uint16* s2) 253 { 254 while (s1 && s2) 255 { 256 int r = s1++ - s2++; 257 258 if (r) 259 return r; 260 } 261 262 if (s2) 263 return -1; 264 else if (s1) 265 return 1; 266 267 mike 1.112 return 0; 268 } 269
270 kumpf 1.130 #ifdef PEGASUS_STRING_NO_UTF8
271 mike 1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2) 272 { 273 Uint16 c1; 274 Uint16 c2; 275 276 do 277 { 278 c1 = s1++; 279 c2 = s2++; 280 281 if (c1 == 0) 282 return c1 - c2; 283 } 284 while (c1 == c2); 285 286 return c1 - c2; 287 }
288 kumpf 1.130 #endif
289 mike 1.112 290 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 291 { 292 memcpy(s1, s2, n * sizeof(Uint16)); 293 } 294 295 void StringThrowOutOfBounds() 296 { 297 throw IndexOutOfBoundsException(); 298 } 299 300 inline void _checkNullPointer(const void* ptr) 301 { 302 if (!ptr) 303 throw NullPointer(); 304 } 305 306 static void _StringThrowBadUTF8(Uint32 index) 307 { 308 MessageLoaderParms parms( 309 "Common.String.BAD_UTF8", 310 mike 1.112 "The byte sequence starting at index $0 " 311 "is not valid UTF-8 encoding.", 312 index); 313 throw Exception(parms); 314 } 315 316 static size_t _copyFromUTF8(
317 david.dillard 1.116 Uint16* dest, 318 const char* src,
319 mike 1.112 size_t n, 320 size_t& utf8_error_index) 321 { 322 Uint16* p = dest; 323 const Uint8* q = (const Uint8)src; 324 325 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). 326 // Use loop-unrolling. 327 328 while (n >=8 && ((q[0]\|q[1]\|q[2]\|q[3]\|q[4]\|q[5]\|q[6]\|q[7]) & 0x80) == 0) 329 { 330 p[0] = q[0]; 331 p[1] = q[1]; 332 p[2] = q[2]; 333 p[3] = q[3]; 334 p[4] = q[4]; 335 p[5] = q[5]; 336 p[6] = q[6]; 337 p[7] = q[7]; 338 p += 8; 339 q += 8; 340 mike 1.112 n -= 8; 341 } 342 343 while (n >=4 && ((q[0]\|q[1]\|q[2]\|q[3]) & 0x80) == 0) 344 { 345 p[0] = q[0]; 346 p[1] = q[1]; 347 p[2] = q[2]; 348 p[3] = q[3]; 349 p += 4; 350 q += 4; 351 n -= 4; 352 } 353 354 switch (n) 355 { 356 case 0: 357 return p - dest; 358 case 1: 359 if (q[0] < 128) 360 { 361 mike 1.112 p[0] = q[0]; 362 return p + 1 - dest; 363 } 364 break; 365 case 2: 366 if (((q[0]\|q[1]) & 0x80) == 0) 367 { 368 p[0] = q[0]; 369 p[1] = q[1]; 370 return p + 2 - dest; 371 } 372 break; 373 case 3: 374 if (((q[0]\|q[1]\|q[2]) & 0x80) == 0) 375 { 376 p[0] = q[0]; 377 p[1] = q[1]; 378 p[2] = q[2]; 379 return p + 3 - dest; 380 } 381 break; 382 mike 1.112 } 383 384 // Process remaining characters. 385 386 while (n) 387 { 388 // Optimize for 7-bit ASCII case. 389 390 if (q < 128) 391 { 392 p++ = q++; 393 n--; 394 } 395 else 396 { 397 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(q) + 1; 398 399 if (c > n \|\| !isValid_U8(q, c) \|\| 400 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 401 { 402 utf8_error_index = q - (const Uint8)src; 403 mike 1.112 return size_t(-1); 404 } 405 406 n -= c; 407 } 408 } 409 410 return p - dest; 411 } 412
413 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
414 mike 1.112 // terminator). 415 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 416 { 417 // The following employs loop unrolling for efficiency. Please do not 418 // eliminate. 419 420 const Uint16* q = src; 421 Uint8* p = (Uint8*)dest; 422 423 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
424 kumpf 1.82 {
425 mike 1.112 p[0] = q[0]; 426 p[1] = q[1]; 427 p[2] = q[2]; 428 p[3] = q[3]; 429 p += 4; 430 q += 4; 431 n -= 4;
432 kumpf 1.82 }
433 mike 1.112 434 switch (n) 435 { 436 case 0: 437 return p - (Uint8)dest; 438 case 1: 439 if (q[0] < 128) 440 { 441 p[0] = q[0]; 442 return p + 1 - (Uint8)dest; 443 } 444 break; 445 case 2: 446 if (q[0] < 128 && q[1] < 128) 447 { 448 p[0] = q[0]; 449 p[1] = q[1]; 450 return p + 2 - (Uint8)dest; 451 } 452 break; 453 case 3: 454 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 455 { 456 p[0] = q[0]; 457 p[1] = q[1]; 458 p[2] = q[2]; 459 return p + 3 - (Uint8)dest; 460 } 461 break; 462 } 463 464 // If this line was reached, there must be characters greater than 128. 465 466 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 467 468 return p - (Uint8*)dest;
469 kumpf 1.54 } 470
471 mike 1.112 static inline size_t _convert( 472 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
473 kumpf 1.54 {
474 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 475 _copy(p, q, n); 476 return n; 477 #else 478 return _copyFromUTF8(p, q, n, utf8_error_index); 479 #endif
480 kumpf 1.54 } 481
482 mike 1.112 //============================================================================== 483 // 484 // class CString 485 // 486 //============================================================================== 487 488 CString::CString(const CString& cstr) : _rep(0)
489 kumpf 1.54 {
490 mike 1.112 if (cstr._rep)
491 kumpf 1.82 {
492 mike 1.112 size_t n = strlen(cstr._rep) + 1; 493 _rep = (char*)operator new(n); 494 memcpy(_rep, cstr._rep, n);
495 kumpf 1.82 }
496 kumpf 1.54 } 497
498 kumpf 1.56 CString& CString::operator=(const CString& cstr) 499 {
500 kumpf 1.82 if (&cstr != this)
501 kumpf 1.81 {
502 kumpf 1.82 if (_rep) 503 {
504 mike 1.112 operator delete(_rep);
505 kumpf 1.82 _rep = 0; 506 }
507 mike 1.112
508 kumpf 1.82 if (cstr._rep) 509 {
510 mike 1.112 size_t n = strlen(cstr._rep) + 1; 511 _rep = (char*)operator new(n); 512 memcpy(_rep, cstr._rep, n);
513 kumpf 1.82 }
514 kumpf 1.81 }
515 mike 1.112
516 kumpf 1.56 return *this; 517 } 518
519 mike 1.112 //==============================================================================
520 kumpf 1.54 //
521 mike 1.112 // class StringRep
522 kumpf 1.39 //
523 mike 1.112 //==============================================================================
524 kumpf 1.39
525 mike 1.112 StringRep StringRep::_emptyRep;
526 mike 1.27
527 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
528 mike 1.27 {
529 dave.sudlik 1.120 // Check for potential overflow in cap 530 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
531 mike 1.27
532 mike 1.112 StringRep* rep = (StringRep)::operator new( 533 sizeof(StringRep) + cap sizeof(Uint16)); 534 rep->cap = cap; 535 new(&rep->refs) AtomicInt(1); 536 537 return rep;
538 mike 1.27 } 539
540 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
541 chuck 1.102 {
542 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
543 chuck 1.102 {
544 mike 1.112 size_t n = _roundUpToPow2(cap); 545 StringRep* newRep = StringRep::alloc(n); 546 newRep->size = rep->size; 547 _copy(newRep->data, rep->data, rep->size + 1); 548 StringRep::unref(rep); 549 rep = newRep; 550 } 551 }
552 david.dillard 1.105
553 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 554 { 555 StringRep* rep = StringRep::alloc(size); 556 rep->size = size; 557 _copy(rep->data, data, size); 558 rep->data[size] = '\0'; 559 return rep; 560 }
561 chuck 1.102
562 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 563 { 564 // Return a new copy of rep. Release rep.
565 chuck 1.102
566 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 567 newRep->size = rep->size; 568 _copy(newRep->data, rep->data, rep->size); 569 newRep->data[newRep->size] = '\0'; 570 StringRep::unref(rep); 571 return newRep;
572 chuck 1.102 } 573
574 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
575 kumpf 1.43 {
576 mike 1.112 StringRep* rep = StringRep::alloc(size); 577 size_t utf8_error_index; 578 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 579 580 if (rep->size == size_t(-1)) 581 { 582 StringRep::free(rep);
583 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
584 mike 1.112 }
585 kumpf 1.43
586 mike 1.112 rep->data[rep->size] = '\0';
587 kumpf 1.43
588 mike 1.112 return rep;
589 mike 1.27 } 590
591 mike 1.112 Uint32 StringRep::length(const Uint16* str)
592 mike 1.27 {
593 mike 1.112 // Note: We could unroll this but it is rarely called. 594 595 const Uint16* end = (Uint16)str; 596 597 while (end++) 598 ; 599
600 a.dunfey 1.125 return (Uint32)(end - str - 1);
601 kumpf 1.39 }
602 tony 1.66
603 mike 1.112 //============================================================================== 604 // 605 // class String 606 // 607 //============================================================================== 608 609 const String String::EMPTY;
610 mike 1.27
611 kumpf 1.39 String::String(const String& str, Uint32 n) 612 {
613 mike 1.112 _checkBounds(n, str._rep->size); 614 _rep = StringRep::create(str._rep->data, n);
615 kumpf 1.39 } 616 617 String::String(const Char16* str) 618 {
619 mike 1.112 _checkNullPointer(str); 620 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
621 mike 1.27 } 622
623 kumpf 1.39 String::String(const Char16* str, Uint32 n) 624 {
625 mike 1.112 _checkNullPointer(str); 626 _rep = StringRep::create((Uint16*)str, n);
627 kumpf 1.39 } 628 629 String::String(const char* str)
630 mike 1.27 {
631 mike 1.112 _checkNullPointer(str);
632 david.dillard 1.105
633 mike 1.112 // Set this just in case create() throws an exception. 634 _rep = &StringRep::_emptyRep; 635 _rep = StringRep::create(str, strlen(str));
636 mike 1.27 } 637
638 kumpf 1.39 String::String(const char* str, Uint32 n)
639 mike 1.27 {
640 mike 1.112 _checkNullPointer(str);
641 david.dillard 1.105
642 mike 1.112 // Set this just in case create() throws an exception. 643 _rep = &StringRep::_emptyRep; 644 _rep = StringRep::create(str, n);
645 kumpf 1.39 }
646 mike 1.27
647 mike 1.112 String::String(const String& s1, const String& s2)
648 kumpf 1.39 {
649 mike 1.112 size_t n1 = s1._rep->size; 650 size_t n2 = s2._rep->size; 651 size_t n = n1 + n2; 652 _rep = StringRep::alloc(n); 653 _copy(_rep->data, s1._rep->data, n1); 654 _copy(_rep->data + n1, s2._rep->data, n2); 655 _rep->size = n; 656 _rep->data[n] = '\0';
657 mike 1.27 } 658
659 mike 1.112 String::String(const String& s1, const char* s2)
660 mike 1.27 {
661 mike 1.112 _checkNullPointer(s2); 662 size_t n1 = s1._rep->size; 663 size_t n2 = strlen(s2); 664 _rep = StringRep::alloc(n1 + n2); 665 _copy(_rep->data, s1._rep->data, n1); 666 size_t utf8_error_index; 667 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 668 669 if (tmp == size_t(-1))
670 kumpf 1.82 {
671 mike 1.112 StringRep::free(_rep); 672 _rep = &StringRep::_emptyRep;
673 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
674 kumpf 1.82 }
675 mike 1.112 676 _rep->size = n1 + tmp; 677 _rep->data[_rep->size] = '\0';
678 mike 1.27 } 679
680 mike 1.112 String::String(const char* s1, const String& s2)
681 mike 1.27 {
682 mike 1.112 _checkNullPointer(s1); 683 size_t n1 = strlen(s1); 684 size_t n2 = s2._rep->size; 685 _rep = StringRep::alloc(n1 + n2); 686 size_t utf8_error_index; 687 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 688 689 if (tmp == size_t(-1)) 690 { 691 StringRep::free(_rep); 692 _rep = &StringRep::_emptyRep;
693 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
694 mike 1.112 } 695 696 _rep->size = n2 + tmp; 697 _copy(_rep->data + n1, s2._rep->data, n2); 698 _rep->data[_rep->size] = '\0';
699 mike 1.27 } 700
701 mike 1.112 String& String::assign(const String& str)
702 mike 1.27 {
703 mike 1.112 if (_rep != str._rep)
704 david.dillard 1.105 {
705 mike 1.112 StringRep::unref(_rep); 706 StringRep::ref(_rep = str._rep);
707 david.dillard 1.105 } 708
709 mike 1.27 return this; 710 } 711 712 String& String::assign(const Char16 str, Uint32 n) 713 {
714 mike 1.112 _checkNullPointer(str); 715
716 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
717 david.dillard 1.105 {
718 mike 1.112 StringRep::unref(_rep); 719 _rep = StringRep::alloc(n);
720 david.dillard 1.105 } 721
722 mike 1.112 _rep->size = n; 723 _copy(_rep->data, (Uint16*)str, n); 724 _rep->data[n] = '\0'; 725
726 mike 1.27 return *this; 727 } 728
729 mike 1.112 String& String::assign(const char* str, Uint32 n)
730 chuck 1.102 {
731 mike 1.112 _checkNullPointer(str); 732
733 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
734 david.dillard 1.105 {
735 mike 1.112 StringRep::unref(_rep); 736 _rep = StringRep::alloc(n);
737 david.dillard 1.105 } 738
739 mike 1.112 size_t utf8_error_index; 740 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
741 chuck 1.102
742 mike 1.112 if (_rep->size == size_t(-1))
743 david.dillard 1.105 {
744 mike 1.112 StringRep::free(_rep); 745 _rep = &StringRep::_emptyRep;
746 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
747 david.dillard 1.105 }
748 mike 1.112 749 _rep->data[_rep->size] = 0;
750 david.dillard 1.105
751 mike 1.27 return *this; 752 } 753
754 kumpf 1.39 void String::clear() 755 {
756 mike 1.112 if (_rep->size) 757 {
758 mike 1.114 if (_rep->refs.get() == 1)
759 mike 1.112 { 760 _rep->size = 0; 761 _rep->data[0] = '\0'; 762 } 763 else 764 { 765 StringRep::unref(_rep); 766 _rep = &StringRep::_emptyRep; 767 } 768 }
769 kumpf 1.39 } 770
771 mike 1.112 void String::reserveCapacity(Uint32 cap)
772 kumpf 1.39 {
773 mike 1.112 _reserve(_rep, cap);
774 kumpf 1.39 } 775
776 mike 1.112 CString String::getCString() const 777 {
778 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16 779 // counterpart, so we allocate extra memory for the worst case. In the
780 mike 1.112 // best case, we may need only one third of the memory allocated. But
781 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since 782 // CString objects are usually short-lived (disappearing after only a few
783 mike 1.112 // instructions). CString objects are typically created on the stack as 784 // means to obtain a char* pointer. 785 786 #ifdef PEGASUS_STRING_NO_UTF8 787 char* str = (char*)operator new(_rep->size + 1); 788 _copy(str, _rep->data, _rep->size); 789 str[_rep->size] = '\0'; 790 return CString(str);
791 gs.keenan 1.110 #else
792 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
793 mike 1.112 char* str = (char*)operator new(n + 1); 794 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 795 str[size] = '\0'; 796 return CString(str);
797 gs.keenan 1.110 #endif
798 kumpf 1.39 } 799
800 mike 1.112 String& String::append(const Char16* str, Uint32 n)
801 kumpf 1.39 {
802 mike 1.112 _checkNullPointer(str); 803 804 size_t oldSize = _rep->size; 805 size_t newSize = oldSize + n;
806 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
807 mike 1.112 _copy(_rep->data + oldSize, (Uint16)str, n); 808 _rep->size = newSize; 809 _rep->data[newSize] = '\0'; 810 811 return this;
812 kumpf 1.39 } 813
814 mike 1.112 String& String::append(const String& str)
815 mike 1.27 {
816 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
817 mike 1.27 } 818
819 mike 1.112 String& String::append(const char* str, Uint32 size)
820 mike 1.27 {
821 mike 1.112 _checkNullPointer(str); 822 823 size_t oldSize = _rep->size; 824 size_t cap = oldSize + size; 825
826 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
827 mike 1.112 size_t utf8_error_index; 828 size_t tmp = _convert( 829 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 830 831 if (tmp == size_t(-1)) 832 { 833 StringRep::free(_rep); 834 _rep = &StringRep::_emptyRep;
835 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
836 mike 1.112 }
837 mike 1.27
838 mike 1.112 _rep->size += tmp; 839 _rep->data[_rep->size] = '\0';
840 mike 1.27
841 kumpf 1.39 return *this; 842 } 843
844 mike 1.112 void String::remove(Uint32 index, Uint32 n)
845 mike 1.27 {
846 mike 1.112 if (n == PEG_NOT_FOUND)
847 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
848 mike 1.112 849 _checkBounds(index + n, _rep->size); 850
851 mike 1.114 if (_rep->refs.get() != 1)
852 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
853 mike 1.27
854 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
855 mike 1.27
856 mike 1.112 size_t rem = _rep->size - (index + n); 857 Uint16* data = _rep->data;
858 mike 1.27
859 mike 1.112 if (rem) 860 memmove(data + index, data + index + n, rem * sizeof(Uint16));
861 mike 1.27
862 mike 1.112 _rep->size -= n; 863 data[_rep->size] = '\0';
864 mike 1.27 } 865
866 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
867 mike 1.27 {
868 mike 1.112 // Note: this implementation is very permissive but used for 869 // backwards compatibility. 870 871 if (index < _rep->size)
872 mike 1.27 {
873 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index)
874 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
875 mike 1.27
876 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
877 mike 1.27 }
878 david.dillard 1.105 879 return String();
880 mike 1.27 } 881 882 Uint32 String::find(Char16 c) const 883 {
884 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
885 mike 1.27
886 mike 1.112 if (p)
887 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
888 mike 1.27 889 return PEG_NOT_FOUND; 890 } 891
892 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
893 mike 1.30 {
894 mike 1.112 _checkBounds(index, _rep->size); 895 896 if (index >= _rep->size) 897 return PEG_NOT_FOUND; 898 899 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
900 mike 1.30
901 mike 1.112 if (p)
902 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
903 mike 1.30 904 return PEG_NOT_FOUND; 905 } 906
907 mike 1.112 Uint32 StringFindAux( 908 const StringRep* _rep, const Char16* s, Uint32 n)
909 mike 1.27 {
910 mike 1.112 _checkNullPointer(s);
911 mike 1.27
912 mike 1.112 const Uint16* data = _rep->data; 913 size_t rem = _rep->size; 914 915 while (n <= rem)
916 mike 1.30 {
917 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 918 919 if (!p) 920 break;
921 mike 1.30
922 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
923 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
924 david.dillard 1.105
925 mike 1.112 p++; 926 rem -= p - data; 927 data = p;
928 mike 1.27 }
929 mike 1.112
930 mike 1.27 return PEG_NOT_FOUND; 931 } 932
933 mike 1.112 Uint32 String::find(const char* s) const 934 { 935 _checkNullPointer(s); 936 937 // Note: could optimize away creation of temporary, but this is rarely 938 // called. 939 return find(String(s)); 940 } 941
942 mike 1.27 Uint32 String::reverseFind(Char16 c) const 943 {
944 mike 1.112 Uint16 x = c; 945 Uint16* p = _rep->data; 946 Uint16* q = _rep->data + _rep->size;
947 mike 1.27
948 mike 1.112 while (q != p)
949 mike 1.27 {
950 mike 1.112 if (*--q == x)
951 david.dillard 1.116 return static_cast<Uint32>(q - p);
952 mike 1.27 } 953 954 return PEG_NOT_FOUND; 955 } 956 957 void String::toLower() 958 {
959 david 1.69 #ifdef PEGASUS_HAS_ICU
960 mike 1.112
961 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
962 david 1.90 {
963 mike 1.114 if (_rep->refs.get() != 1)
964 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 965
966 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
967 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 968 // prevents optimizations where the us-ascii is converted before
969 mike 1.112 // calling ICU.
970 yi.zhou 1.108 // The string may shrink or expand after the convert. 971
972 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 973 //// only the size when zero is passed as the destination size argument. 974
975 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 976
977 mike 1.112 int32_t newSize = u_strToLower( 978 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
979 david.dillard 1.116
980 mike 1.112 err = U_ZERO_ERROR; 981 982 //// Reserve enough space for the result. 983 984 if ((Uint32)newSize > _rep->cap) 985 _reserve(_rep, newSize); 986 987 //// Perform the conversion (overlapping buffers are allowed).
988 chuck 1.99
989 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 990 (UChar)_rep->data, _rep->size, NULL, &err);
991 yi.zhou 1.108
992 mike 1.112 _rep->size = newSize; 993 return;
994 david 1.90 }
995 mike 1.112 996 #endif /* PEGASUS_HAS_ICU */ 997
998 mike 1.114 if (_rep->refs.get() != 1)
999 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1000 1001 Uint16* p = _rep->data; 1002 size_t n = _rep->size; 1003 1004 for (; n--; p++)
1005 david 1.90 {
1006 mike 1.112 if (!(p & 0xFF00)) 1007 p = _toLower(*p);
1008 mike 1.27 }
1009 kumpf 1.39 } 1010
1011 chuck 1.99 void String::toUpper()
1012 david 1.90 { 1013 #ifdef PEGASUS_HAS_ICU
1014 mike 1.112
1015 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1016 chuck 1.99 {
1017 mike 1.114 if (_rep->refs.get() != 1)
1018 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1019
1020 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
1021 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 1022 // prevents optimizations where the us-ascii is converted before
1023 mike 1.112 // calling ICU.
1024 yi.zhou 1.108 // The string may shrink or expand after the convert. 1025
1026 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 1027 //// only the size when zero is passed as the destination size argument. 1028
1029 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1030
1031 mike 1.112 int32_t newSize = u_strToUpper( 1032 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 1033 1034 err = U_ZERO_ERROR; 1035 1036 //// Reserve enough space for the result. 1037 1038 if ((Uint32)newSize > _rep->cap) 1039 _reserve(_rep, newSize); 1040 1041 //// Perform the conversion (overlapping buffers are allowed). 1042 1043 u_strToUpper((UChar)_rep->data, newSize, 1044 (UChar*)_rep->data, _rep->size, NULL, &err);
1045 chuck 1.99
1046 mike 1.112 _rep->size = newSize;
1047 yi.zhou 1.108
1048 mike 1.112 return;
1049 david 1.91 }
1050 mike 1.112 1051 #endif /* PEGASUS_HAS_ICU */ 1052
1053 mike 1.114 if (_rep->refs.get() != 1)
1054 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1055 1056 Uint16* p = _rep->data; 1057 size_t n = _rep->size; 1058 1059 for (; n--; p++) 1060 p = _toUpper(p);
1061 david 1.90 } 1062
1063 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
1064 kumpf 1.39 {
1065 kumpf 1.118 const Uint16* p1 = s1._rep->data; 1066 const Uint16* p2 = s2._rep->data;
1067 mike 1.27
1068 kumpf 1.118 while (n--) 1069 { 1070 int r = p1++ - p2++; 1071 if (r) 1072 { 1073 return r; 1074 } 1075 else if (!p1[-1]) 1076 { 1077 // We must have encountered a null terminator in both s1 and s2 1078 return 0; 1079 } 1080 } 1081 return 0;
1082 mike 1.27 } 1083
1084 kumpf 1.43 int String::compare(const String& s1, const String& s2)
1085 mike 1.30 {
1086 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 1087 }
1088 kumpf 1.43
1089 mike 1.112 int String::compare(const String& s1, const char* s2) 1090 { 1091 _checkNullPointer(s2);
1092 mike 1.30
1093 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 1094 return _compareNoUTF8(s1._rep->data, s2); 1095 #else 1096 // ATTN: optimize this! 1097 return String::compare(s1, String(s2)); 1098 #endif
1099 mike 1.30 } 1100
1101 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
1102 kumpf 1.40 {
1103 david 1.69 #ifdef PEGASUS_HAS_ICU
1104 mike 1.112
1105 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1106 {
1107 mike 1.112 return u_strcasecmp(
1108 dave.sudlik 1.124 (const UChar)str1._rep->data, 1109 (const UChar)str2._rep->data, 1110 U_FOLD_CASE_DEFAULT 1111 );
1112 yi.zhou 1.108 }
1113 kumpf 1.40
1114 mike 1.112 #endif /* PEGASUS_HAS_ICU / 1115 1116 const Uint16 s1 = str1._rep->data; 1117 const Uint16* s2 = str2._rep->data; 1118 1119 while (s1 && s2)
1120 kumpf 1.40 {
1121 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
1122 kumpf 1.40
1123 david.dillard 1.105 if (r) 1124 return r;
1125 kumpf 1.40 } 1126
1127 mike 1.112 if (*s2)
1128 david.dillard 1.105 return -1;
1129 mike 1.112 else if (*s1)
1130 david.dillard 1.105 return 1;
1131 kumpf 1.40 1132 return 0; 1133 } 1134
1135 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1136 mike 1.27 {
1137 mike 1.112 #ifdef PEGASUS_HAS_ICU 1138 1139 return String::compareNoCase(s1, s2) == 0; 1140 1141 #else /* PEGASUS_HAS_ICU */
1142 mike 1.27
1143 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 1144 // eliminate.
1145 kumpf 1.39
1146 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1147 Uint16 q = (Uint16*)s2.getChar16Data(); 1148 Uint32 n = s2.size(); 1149 1150 while (n >= 8) 1151 { 1152 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1153 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1154 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1155 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1156 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1157 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1158 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1159 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1160 { 1161 return false; 1162 }
1163 kumpf 1.39
1164 mike 1.112 n -= 8; 1165 p += 8; 1166 q += 8; 1167 }
1168 mike 1.27
1169 mike 1.112 while (n >= 4)
1170 kumpf 1.39 {
1171 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1172 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1173 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1174 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1175 david.dillard 1.105 {
1176 mike 1.112 return false;
1177 david.dillard 1.105 }
1178 mike 1.112 1179 n -= 4; 1180 p += 4; 1181 q += 4; 1182 } 1183 1184 while (n--) 1185 { 1186 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1187 david.dillard 1.105 return false;
1188 mike 1.112 1189 p++; 1190 q++;
1191 kumpf 1.39 }
1192 mike 1.28
1193 kumpf 1.39 return true;
1194 mike 1.112 1195 #endif /* PEGASUS_HAS_ICU */
1196 david 1.69 } 1197
1198 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1199 david 1.69 {
1200 mike 1.112 _checkNullPointer(s2);
1201 david 1.69
1202 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1203 david 1.69
1204 mike 1.112 return String::equalNoCase(s1, String(s2));
1205 david 1.69
1206 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1207 david 1.69
1208 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1209 const char p2 = s2; 1210 size_t n = s1._rep->size;
1211 david.dillard 1.105
1212 mike 1.112 while (n--) 1213 { 1214 if (!*p2) 1215 return false;
1216 david 1.71
1217 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1218 return false; 1219 }
1220 kumpf 1.42
1221 mike 1.112 if (*p2) 1222 return false;
1223 david.dillard 1.116
1224 mike 1.112 return true;
1225 karl 1.36
1226 mike 1.112 #else /* PEGASUS_HAS_ICU */
1227 david.dillard 1.105
1228 mike 1.112 // ATTN: optimize this! 1229 return String::equalNoCase(s1, String(s2));
1230 david.dillard 1.105
1231 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1232 }
1233 chuck 1.78
1234 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1235 karl 1.36 {
1236 marek 1.131 return (s1._rep == s2._rep) \|\|
1237 kumpf 1.135 (s1._rep->size == s2._rep->size) &&
1238 marek 1.131 memcmp(s1._rep->data,
1239 kumpf 1.135 s2._rep->data,
1240 marek 1.131 s1._rep->size * sizeof(Uint16)) == 0;
1241 karl 1.36 } 1242
1243 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1244 { 1245 #ifdef PEGASUS_STRING_NO_UTF8
1246 kumpf 1.35
1247 mike 1.112 _checkNullPointer(s2);
1248 kumpf 1.39
1249 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1250 const char q = s2;
1251 kumpf 1.39
1252 mike 1.112 while (p && q) 1253 { 1254 if (p++ != Uint16(q++)) 1255 return false; 1256 }
1257 kumpf 1.39
1258 mike 1.112 return !(p \|\| q);
1259 kumpf 1.39
1260 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1261 kumpf 1.39
1262 mike 1.112 return String::equal(s1, String(s2));
1263 kumpf 1.39
1264 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1265 kumpf 1.39 } 1266
1267 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1268 kumpf 1.39 {
1269 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1270 david 1.69
1271 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1272 {
1273 david.dillard 1.105 char buf = NULL; 1274 const int size = str.size() 6;
1275 mike 1.112 UnicodeString UniStr( 1276 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1277 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1278 buf = new char[bufsize+1]; 1279 UniStr.extract(0,bufsize,buf); 1280 os << buf; 1281 os.flush(); 1282 delete [] buf;
1283 david.dillard 1.116 return os;
1284 yi.zhou 1.108 }
1285 mike 1.112
1286 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
1287 mike 1.112 1288 for (Uint32 i = 0, n = str.size(); i < n; i++)
1289 yi.zhou 1.108 {
1290 mike 1.112 Uint16 code = str[i];
1291 david.dillard 1.105
1292 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1293 os << char(code); 1294 else 1295 { 1296 // Print in hex format: 1297 char buffer[8]; 1298 sprintf(buffer, "\\x%04X", code); 1299 os << buffer;
1300 david.dillard 1.105 }
1301 yi.zhou 1.108 }
1302 kumpf 1.39 1303 return os; 1304 } 1305
1306 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1307 kumpf 1.39 {
1308 mike 1.112 StringRep* tmp; 1309 1310 if (_rep->cap) 1311 { 1312 tmp = StringRep::alloc(2 * _rep->cap); 1313 tmp->size = _rep->size; 1314 _copy(tmp->data, _rep->data, _rep->size); 1315 } 1316 else 1317 { 1318 tmp = StringRep::alloc(8); 1319 tmp->size = 0; 1320 } 1321 1322 StringRep::unref(_rep); 1323 _rep = tmp;
1324 kumpf 1.39 } 1325
1326 thilo.boehm 1.128 void AssignASCII(String& s, const char* str, Uint32 n) 1327 { 1328 class StringLayout 1329 { 1330 public: 1331 StringRep* rep; 1332 }; 1333
1334 kumpf 1.130 StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1335 thilo.boehm 1.128 1336 _checkNullPointer(str); 1337 1338 if (n > that->rep->cap \|\| that->rep->refs.get() != 1) 1339 { 1340 StringRep::unref(that->rep); 1341 that->rep = StringRep::alloc(n); 1342 } 1343 1344 _copy(that->rep->data, str, n); 1345 that->rep->size = n; 1346 that->rep->data[that->rep->size] = 0; 1347 } 1348
1349 mike 1.112 PEGASUS_NAMESPACE_END 1350 1351 /* 1352 ================================================================================ 1353 1354 String optimizations: 1355 1356 1. Added mechanism allowing certain functions to be inlined only when 1357 used by internal Pegasus modules. External modules (i.e., providers) 1358 link to a non-inline version, which allows for binary compatibility. 1359 1360 2. Implemented copy-on-write with atomic increment/decrement. This 1361 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1362 for the 'ni1000' benchmark. 1363 1364 3. Employed loop unrolling in several places. For example, see: 1365 1366 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1367 1368 4. Used the "empty-rep" optimization (described in whitepaper from the 1369 GCC Developers Summit). This reduced default construction to a simple 1370 mike 1.112 pointer assignment. 1371 1372 inline String::String() : _rep(&_emptyRep) { } 1373 1374 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1375 For example: 1376 1377 static const char _upper[] = 1378 { 1379 0,1,2,...255 1380 }; 1381 1382 inline Uint16 _toUpper(Uint16 x) 1383 { 1384 return (x & 0xFF00) ? x : _upper[x]; 1385 } 1386
1387 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
1388 mike 1.112 operation. 1389
1390 david.dillard 1.116 6. Implemented char* version of the following member functions to 1391 eliminate unecessary creation of anonymous string objects
1392 mike 1.112 (temporaries). 1393 1394 String(const String& s1, const char* s2); 1395 String(const char* s1, const String& s2); 1396 String& String::operator=(const char* str); 1397 Uint32 String::find(const char* s) const; 1398 bool String::equal(const String& s1, const char* s2); 1399 static int String::compare(const String& s1, const char* s2); 1400 String& String::append(const char* str); 1401 String& String::append(const char* str, Uint32 size); 1402 static bool String::equalNoCase(const String& s1, const char* s2); 1403 String& operator=(const char* str) 1404 String& String::assign(const char* str) 1405 String& String::append(const char* str) 1406 Boolean operator==(const String& s1, const char* s2) 1407 Boolean operator==(const char* s1, const String& s2) 1408 Boolean operator!=(const String& s1, const char* s2) 1409 Boolean operator!=(const char* s1, const String& s2) 1410 Boolean operator<(const String& s1, const char* s2) 1411 Boolean operator<(const char* s1, const String& s2) 1412 Boolean operator>(const String& s1, const char* s2) 1413 mike 1.112 Boolean operator>(const char* s1, const String& s2) 1414 Boolean operator<=(const String& s1, const char* s2) 1415 Boolean operator<=(const char* s1, const String& s2) 1416 Boolean operator>=(const String& s1, const char* s2) 1417 Boolean operator>=(const char* s1, const String& s2) 1418 String operator+(const String& s1, const char* s2) 1419 String operator+(const char* s1, const String& s2) 1420
1421 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
1422 mike 1.112 power of two (algorithm from the book "Hacker's Delight"). 1423 1424 static Uint32 _roundUpToPow2(Uint32 x) 1425 { 1426 if (x < 8) 1427 return 8; 1428 1429 x--; 1430 x \|= (x >> 1); 1431 x \|= (x >> 2); 1432 x \|= (x >> 4); 1433 x \|= (x >> 8); 1434 x \|= (x >> 16); 1435 x++; 1436 1437 return x; 1438 } 1439 1440 8. Implemented "concatenating constructors" to eliminate temporaries
1441 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
1442 mike 1.112 optimization" described by Stan Lippman. 1443 1444 inline String operator+(const String& s1, const String& s2) 1445 { 1446 return String(s1, s2, 0); 1447 } 1448 1449 9. Experimented to find the optimial initial size for a short string. 1450 Eight seems to offer the best tradeoff between space and time. 1451 1452 10. Inlined all members of the Char16 class. 1453 1454 11. Used Uint16 internally in the String class. This showed no improvememnt 1455 since Char16 was already fully inlined and was essentially reduced to 1456 Uint16 in any case. 1457 1458 12. Implemented conditional logic (#if) allowing error checking logic to
1459 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
1460 mike 1.112 and null-pointer checking. 1461 1462 13. Used memcpy() and memcmp() where possible. These are implemented using 1463 the rep family of intructions under Intel and are much faster. 1464
1465 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1466 mike 1.112 copy routine overhead. 1467 1468 15. Added ASCII7 form of the constructor and assign(). 1469 1470 String s("hello world", String::ASCII7); 1471 1472 s.assignASCII7("hello world"); 1473 1474 This avoids slower UTF8 processing when not needed. 1475 1476 ================================================================================ 1477 */

No CVS admin address has been configured