pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
2 mike 1.27 //
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 mike 1.27 // 12 // Permission is hereby granted, free of charge, to any person obtaining a copy
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 14 // deal in the Software without restriction, including without limitation the 15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 17 // furnished to do so, subject to the following conditions:
18 david.dillard 1.105 //
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 // 28 //============================================================================== 29 //
30 mike 1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
31 mike 1.27 //
32 mike 1.112 // Modified By: 33 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) 34 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297 35 // David Dillard, VERITAS Software Corp. (david.dillard@veritas.com) 36 // Mike Brasher (mike-brasher@austin.rr.com)
37 mike 1.27 // 38 //%///////////////////////////////////////////////////////////////////////////// 39
40 mike 1.112 #include <cassert>
41 mike 1.113 #include <cstring>
42 kumpf 1.48 #include "InternalException.h"
43 david 1.69 #include "CommonUTF.h"
44 mike 1.112 #include "MessageLoader.h" 45 #include "StringRep.h"
46 david 1.69 47 #ifdef PEGASUS_HAS_ICU
48 chuck 1.99 #include <unicode/ustring.h> 49 #include <unicode/uchar.h>
50 david 1.69 #endif 51
52 mike 1.112 PEGASUS_NAMESPACE_BEGIN
53 mike 1.28
54 mike 1.112 //============================================================================== 55 // 56 // Compile-time macros (undefined by default). 57 // 58 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions 59 // 60 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 61 // 62 //==============================================================================
63 mike 1.27
64 mike 1.112 //==============================================================================
65 kumpf 1.39 //
66 mike 1.112 // File-scope definitions:
67 kumpf 1.54 //
68 mike 1.112 //============================================================================== 69 70 // Note: this table is much faster than the system toupper(). Please do not 71 // change.
72 kumpf 1.54
73 mike 1.112 const Uint8 _toUpperTable[256] =
74 kumpf 1.54 {
75 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 76 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 77 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 78 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 79 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 80 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 81 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 82 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 83 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 84 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 85 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 86 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 87 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 88 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 89 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 90 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 91 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 92 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 93 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 94 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 95 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 96 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 97 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 98 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 99 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 100 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 101 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 102 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 103 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 104 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 105 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 106 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 107 }; 108 109 // Note: this table is much faster than the system tulower(). Please do not 110 // change. 111 112 const Uint8 _toLowerTable[256] = 113 { 114 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 115 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 116 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 117 mike 1.112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 118 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 119 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 120 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 121 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 122 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 123 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 124 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 125 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 126 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 127 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 128 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 129 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 130 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 131 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 132 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 133 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 134 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 135 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 136 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 137 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 138 mike 1.112 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 139 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 140 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 141 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 142 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 143 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 144 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 145 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 146 }; 147 148 // Converts 16-bit characters to upper case. This routine is faster than the 149 // system toupper(). Please do not change. 150 inline Uint16 _toUpper(Uint16 x) 151 { 152 return (x & 0xFF00) ? x : _toUpperTable[x];
153 kumpf 1.54 } 154
155 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 156 // system toupper(). Please do not change. 157 inline Uint16 _toLower(Uint16 x)
158 kumpf 1.54 {
159 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 160 } 161 162 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 163 static Uint32 _roundUpToPow2(Uint32 x) 164 { 165 #ifndef PEGASUS_STRING_NO_THROW 166 167 if (x > 0x0FFFFFFF) 168 throw PEGASUS_STD(bad_alloc)(); 169 170 #endif 171 172 if (x < 8) 173 return 8; 174 175 x--; 176 x \|= (x >> 1); 177 x \|= (x >> 2); 178 x \|= (x >> 4); 179 x \|= (x >> 8); 180 mike 1.112 x \|= (x >> 16); 181 x++; 182 183 return x; 184 } 185 186 template<class P, class Q> 187 static void _copy(P* p, const Q* q, size_t n) 188 { 189 // The following employs loop unrolling for efficiency. Please do not 190 // eliminate. 191 192 while (n >= 8) 193 { 194 p[0] = q[0]; 195 p[1] = q[1]; 196 p[2] = q[2]; 197 p[3] = q[3]; 198 p[4] = q[4]; 199 p[5] = q[5]; 200 p[6] = q[6]; 201 mike 1.112 p[7] = q[7]; 202 p += 8; 203 q += 8; 204 n -= 8; 205 } 206 207 while (n >= 4) 208 { 209 p[0] = q[0]; 210 p[1] = q[1]; 211 p[2] = q[2]; 212 p[3] = q[3]; 213 p += 4; 214 q += 4; 215 n -= 4; 216 } 217 218 while (n--) 219 p++ = q++; 220 } 221 222 mike 1.112 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 223 { 224 // The following employs loop unrolling for efficiency. Please do not 225 // eliminate. 226 227 while (n >= 4) 228 { 229 if (s[0] == c) 230 return (Uint16)s; 231 if (s[1] == c) 232 return (Uint16)&s[1]; 233 if (s[2] == c) 234 return (Uint16)&s[2]; 235 if (s[3] == c) 236 return (Uint16)&s[3];
237 kumpf 1.82
238 mike 1.112 n -= 4; 239 s += 4; 240 } 241 242 if (n) 243 { 244 if (s == c) 245 return (Uint16)s; 246 s++; 247 n--; 248 } 249 250 if (n) 251 { 252 if (s == c) 253 return (Uint16)s; 254 s++; 255 n--; 256 } 257 258 if (n && s == c) 259 mike 1.112 return (Uint16)s; 260 261 // Not found! 262 return 0; 263 } 264 265 static int _compare(const Uint16* s1, const Uint16* s2) 266 { 267 while (s1 && s2) 268 { 269 int r = s1++ - s2++; 270 271 if (r) 272 return r; 273 } 274 275 if (s2) 276 return -1; 277 else if (s1) 278 return 1; 279 280 mike 1.112 return 0; 281 } 282 283 static int _compareNoUTF8(const Uint16* s1, const char* s2) 284 { 285 Uint16 c1; 286 Uint16 c2; 287 288 do 289 { 290 c1 = s1++; 291 c2 = s2++; 292 293 if (c1 == 0) 294 return c1 - c2; 295 } 296 while (c1 == c2); 297 298 return c1 - c2; 299 } 300 301 mike 1.112 static int _compare(const Uint16* s1, const Uint16* s2, size_t n) 302 { 303 // This should only be called when s1 and s2 have the same length. 304 305 while (n-- && (s1++ - s2++) == 0) 306 ; 307 308 return s1[-1] - s2[-1]; 309 } 310 311 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 312 { 313 memcpy(s1, s2, n * sizeof(Uint16)); 314 } 315 316 void StringThrowOutOfBounds() 317 { 318 throw IndexOutOfBoundsException(); 319 } 320 321 inline void _checkNullPointer(const void* ptr) 322 mike 1.112 { 323 #ifdef PEGASUS_STRING_NO_THROW 324 325 if (!ptr) 326 throw NullPointer(); 327 328 #endif 329 } 330 331 static void _StringThrowBadUTF8(Uint32 index) 332 { 333 MessageLoaderParms parms( 334 "Common.String.BAD_UTF8", 335 "The byte sequence starting at index $0 " 336 "is not valid UTF-8 encoding.", 337 index); 338 throw Exception(parms); 339 } 340 341 static size_t _copyFromUTF8( 342 Uint16* dest, 343 mike 1.112 const char* src, 344 size_t n, 345 size_t& utf8_error_index) 346 { 347 Uint16* p = dest; 348 const Uint8* q = (const Uint8)src; 349 350 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). 351 // Use loop-unrolling. 352 353 while (n >=8 && ((q[0]\|q[1]\|q[2]\|q[3]\|q[4]\|q[5]\|q[6]\|q[7]) & 0x80) == 0) 354 { 355 p[0] = q[0]; 356 p[1] = q[1]; 357 p[2] = q[2]; 358 p[3] = q[3]; 359 p[4] = q[4]; 360 p[5] = q[5]; 361 p[6] = q[6]; 362 p[7] = q[7]; 363 p += 8; 364 mike 1.112 q += 8; 365 n -= 8; 366 } 367 368 while (n >=4 && ((q[0]\|q[1]\|q[2]\|q[3]) & 0x80) == 0) 369 { 370 p[0] = q[0]; 371 p[1] = q[1]; 372 p[2] = q[2]; 373 p[3] = q[3]; 374 p += 4; 375 q += 4; 376 n -= 4; 377 } 378 379 switch (n) 380 { 381 case 0: 382 return p - dest; 383 case 1: 384 if (q[0] < 128) 385 mike 1.112 { 386 p[0] = q[0]; 387 return p + 1 - dest; 388 } 389 break; 390 case 2: 391 if (((q[0]\|q[1]) & 0x80) == 0) 392 { 393 p[0] = q[0]; 394 p[1] = q[1]; 395 return p + 2 - dest; 396 } 397 break; 398 case 3: 399 if (((q[0]\|q[1]\|q[2]) & 0x80) == 0) 400 { 401 p[0] = q[0]; 402 p[1] = q[1]; 403 p[2] = q[2]; 404 return p + 3 - dest; 405 } 406 mike 1.112 break; 407 } 408 409 // Process remaining characters. 410 411 while (n) 412 { 413 // Optimize for 7-bit ASCII case. 414 415 if (q < 128) 416 { 417 p++ = q++; 418 n--; 419 } 420 else 421 { 422 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(q) + 1; 423 424 if (c > n \|\| !isValid_U8(q, c) \|\| 425 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 426 { 427 mike 1.112 utf8_error_index = q - (const Uint8)src; 428 return size_t(-1); 429 } 430 431 n -= c; 432 } 433 } 434 435 return p - dest; 436 } 437 438 // Note: dest must be at least three times src (plus an extra byte for 439 // terminator). 440 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 441 { 442 // The following employs loop unrolling for efficiency. Please do not 443 // eliminate. 444 445 const Uint16* q = src; 446 Uint8* p = (Uint8*)dest; 447 448 mike 1.112 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
449 kumpf 1.82 {
450 mike 1.112 p[0] = q[0]; 451 p[1] = q[1]; 452 p[2] = q[2]; 453 p[3] = q[3]; 454 p += 4; 455 q += 4; 456 n -= 4;
457 kumpf 1.82 }
458 mike 1.112 459 switch (n) 460 { 461 case 0: 462 return p - (Uint8)dest; 463 case 1: 464 if (q[0] < 128) 465 { 466 p[0] = q[0]; 467 return p + 1 - (Uint8)dest; 468 } 469 break; 470 case 2: 471 if (q[0] < 128 && q[1] < 128) 472 { 473 p[0] = q[0]; 474 p[1] = q[1]; 475 return p + 2 - (Uint8)dest; 476 } 477 break; 478 case 3: 479 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 480 { 481 p[0] = q[0]; 482 p[1] = q[1]; 483 p[2] = q[2]; 484 return p + 3 - (Uint8)dest; 485 } 486 break; 487 } 488 489 // If this line was reached, there must be characters greater than 128. 490 491 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 492 493 return p - (Uint8*)dest;
494 kumpf 1.54 } 495
496 mike 1.112 static inline size_t _convert( 497 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
498 kumpf 1.54 {
499 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 500 _copy(p, q, n); 501 return n; 502 #else 503 return _copyFromUTF8(p, q, n, utf8_error_index); 504 #endif
505 kumpf 1.54 } 506
507 mike 1.112 //============================================================================== 508 // 509 // class CString 510 // 511 //============================================================================== 512 513 CString::CString(const CString& cstr) : _rep(0)
514 kumpf 1.54 {
515 mike 1.112 if (cstr._rep)
516 kumpf 1.82 {
517 mike 1.112 size_t n = strlen(cstr._rep) + 1; 518 _rep = (char*)operator new(n); 519 memcpy(_rep, cstr._rep, n);
520 kumpf 1.82 }
521 kumpf 1.54 } 522
523 kumpf 1.56 CString& CString::operator=(const CString& cstr) 524 {
525 kumpf 1.82 if (&cstr != this)
526 kumpf 1.81 {
527 kumpf 1.82 if (_rep) 528 {
529 mike 1.112 operator delete(_rep);
530 kumpf 1.82 _rep = 0; 531 }
532 mike 1.112
533 kumpf 1.82 if (cstr._rep) 534 {
535 mike 1.112 size_t n = strlen(cstr._rep) + 1; 536 _rep = (char*)operator new(n); 537 memcpy(_rep, cstr._rep, n);
538 kumpf 1.82 }
539 kumpf 1.81 }
540 mike 1.112
541 kumpf 1.56 return *this; 542 } 543
544 mike 1.112 //==============================================================================
545 kumpf 1.54 //
546 mike 1.112 // class StringRep
547 kumpf 1.39 //
548 mike 1.112 //==============================================================================
549 kumpf 1.39
550 mike 1.112 StringRep StringRep::_emptyRep;
551 mike 1.27
552 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
553 mike 1.27 {
554 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
555 mike 1.27
556 mike 1.112 // Any string bigger than this is seriously suspect. 557 if (cap > 0x0FFFFFFF) 558 throw PEGASUS_STD(bad_alloc)();
559 mike 1.27
560 mike 1.112 #endif
561 mike 1.27
562 mike 1.112 StringRep* rep = (StringRep)::operator new( 563 sizeof(StringRep) + cap sizeof(Uint16)); 564 rep->cap = cap; 565 new(&rep->refs) AtomicInt(1); 566 567 return rep;
568 mike 1.27 } 569
570 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
571 chuck 1.102 {
572 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
573 chuck 1.102 {
574 mike 1.112 size_t n = _roundUpToPow2(cap); 575 StringRep* newRep = StringRep::alloc(n); 576 newRep->size = rep->size; 577 _copy(newRep->data, rep->data, rep->size + 1); 578 StringRep::unref(rep); 579 rep = newRep; 580 } 581 }
582 david.dillard 1.105
583 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 584 { 585 StringRep* rep = StringRep::alloc(size); 586 rep->size = size; 587 _copy(rep->data, data, size); 588 rep->data[size] = '\0'; 589 return rep; 590 }
591 chuck 1.102
592 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 593 { 594 // Return a new copy of rep. Release rep.
595 chuck 1.102
596 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 597 newRep->size = rep->size; 598 _copy(newRep->data, rep->data, rep->size); 599 newRep->data[newRep->size] = '\0'; 600 StringRep::unref(rep); 601 return newRep;
602 chuck 1.102 } 603
604 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
605 kumpf 1.43 {
606 mike 1.112 StringRep* rep = StringRep::alloc(size); 607 size_t utf8_error_index; 608 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 609 610 #ifndef PEGASUS_STRING_NO_THROW 611 if (rep->size == size_t(-1)) 612 { 613 StringRep::free(rep); 614 _StringThrowBadUTF8(utf8_error_index); 615 } 616 #endif
617 kumpf 1.43
618 mike 1.112 rep->data[rep->size] = '\0';
619 kumpf 1.43
620 mike 1.112 return rep;
621 mike 1.27 } 622
623 mike 1.112 Uint32 StringRep::length(const Uint16* str)
624 mike 1.27 {
625 mike 1.112 // Note: We could unroll this but it is rarely called. 626 627 const Uint16* end = (Uint16)str; 628 629 while (end++) 630 ; 631 632 return end - str - 1;
633 kumpf 1.39 }
634 tony 1.66
635 mike 1.112 //============================================================================== 636 // 637 // class String 638 // 639 //============================================================================== 640 641 const String String::EMPTY;
642 mike 1.27
643 kumpf 1.39 String::String(const String& str, Uint32 n) 644 {
645 mike 1.112 _checkBounds(n, str._rep->size); 646 _rep = StringRep::create(str._rep->data, n);
647 kumpf 1.39 } 648 649 String::String(const Char16* str) 650 {
651 mike 1.112 _checkNullPointer(str); 652 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
653 mike 1.27 } 654
655 kumpf 1.39 String::String(const Char16* str, Uint32 n) 656 {
657 mike 1.112 _checkNullPointer(str); 658 _rep = StringRep::create((Uint16*)str, n);
659 kumpf 1.39 } 660 661 String::String(const char* str)
662 mike 1.27 {
663 mike 1.112 _checkNullPointer(str);
664 david.dillard 1.105
665 mike 1.112 // Set this just in case create() throws an exception. 666 _rep = &StringRep::_emptyRep; 667 _rep = StringRep::create(str, strlen(str));
668 mike 1.27 } 669
670 kumpf 1.39 String::String(const char* str, Uint32 n)
671 mike 1.27 {
672 mike 1.112 _checkNullPointer(str);
673 david.dillard 1.105
674 mike 1.112 // Set this just in case create() throws an exception. 675 _rep = &StringRep::_emptyRep; 676 _rep = StringRep::create(str, n);
677 kumpf 1.39 }
678 mike 1.27
679 mike 1.112 String::String(const String& s1, const String& s2)
680 kumpf 1.39 {
681 mike 1.112 size_t n1 = s1._rep->size; 682 size_t n2 = s2._rep->size; 683 size_t n = n1 + n2; 684 _rep = StringRep::alloc(n); 685 _copy(_rep->data, s1._rep->data, n1); 686 _copy(_rep->data + n1, s2._rep->data, n2); 687 _rep->size = n; 688 _rep->data[n] = '\0';
689 mike 1.27 } 690
691 mike 1.112 String::String(const String& s1, const char* s2)
692 mike 1.27 {
693 mike 1.112 _checkNullPointer(s2); 694 size_t n1 = s1._rep->size; 695 size_t n2 = strlen(s2); 696 _rep = StringRep::alloc(n1 + n2); 697 _copy(_rep->data, s1._rep->data, n1); 698 size_t utf8_error_index; 699 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 700 701 #ifndef PEGASUS_STRING_NO_THROW 702 if (tmp == size_t(-1))
703 kumpf 1.82 {
704 mike 1.112 StringRep::free(_rep); 705 _rep = &StringRep::_emptyRep; 706 _StringThrowBadUTF8(utf8_error_index);
707 kumpf 1.82 }
708 mike 1.112 #endif 709 710 _rep->size = n1 + tmp; 711 _rep->data[_rep->size] = '\0';
712 mike 1.27 } 713
714 mike 1.112 String::String(const char* s1, const String& s2)
715 mike 1.27 {
716 mike 1.112 _checkNullPointer(s1); 717 size_t n1 = strlen(s1); 718 size_t n2 = s2._rep->size; 719 _rep = StringRep::alloc(n1 + n2); 720 size_t utf8_error_index; 721 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 722 723 #ifndef PEGASUS_STRING_NO_THROW 724 if (tmp == size_t(-1)) 725 { 726 StringRep::free(_rep); 727 _rep = &StringRep::_emptyRep; 728 _StringThrowBadUTF8(utf8_error_index); 729 } 730 #endif 731 732 _rep->size = n2 + tmp; 733 _copy(_rep->data + n1, s2._rep->data, n2); 734 _rep->data[_rep->size] = '\0';
735 mike 1.27 } 736
737 mike 1.112 String& String::assign(const String& str)
738 mike 1.27 {
739 mike 1.112 if (_rep != str._rep)
740 david.dillard 1.105 {
741 mike 1.112 StringRep::unref(_rep); 742 StringRep::ref(_rep = str._rep);
743 david.dillard 1.105 } 744
745 mike 1.27 return this; 746 } 747 748 String& String::assign(const Char16 str, Uint32 n) 749 {
750 mike 1.112 _checkNullPointer(str); 751
752 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
753 david.dillard 1.105 {
754 mike 1.112 StringRep::unref(_rep); 755 _rep = StringRep::alloc(n);
756 david.dillard 1.105 } 757
758 mike 1.112 _rep->size = n; 759 _copy(_rep->data, (Uint16*)str, n); 760 _rep->data[n] = '\0'; 761
762 mike 1.27 return *this; 763 } 764
765 mike 1.112 String& String::assign(const char* str, Uint32 n)
766 chuck 1.102 {
767 mike 1.112 _checkNullPointer(str); 768
769 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
770 david.dillard 1.105 {
771 mike 1.112 StringRep::unref(_rep); 772 _rep = StringRep::alloc(n);
773 david.dillard 1.105 } 774
775 mike 1.112 size_t utf8_error_index; 776 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
777 chuck 1.102
778 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW 779 if (_rep->size == size_t(-1))
780 david.dillard 1.105 {
781 mike 1.112 StringRep::free(_rep); 782 _rep = &StringRep::_emptyRep; 783 _StringThrowBadUTF8(utf8_error_index);
784 david.dillard 1.105 }
785 mike 1.112 #endif 786 787 _rep->data[_rep->size] = 0;
788 david.dillard 1.105
789 mike 1.27 return *this; 790 } 791
792 kumpf 1.39 void String::clear() 793 {
794 mike 1.112 if (_rep->size) 795 {
796 mike 1.114 if (_rep->refs.get() == 1)
797 mike 1.112 { 798 _rep->size = 0; 799 _rep->data[0] = '\0'; 800 } 801 else 802 { 803 StringRep::unref(_rep); 804 _rep = &StringRep::_emptyRep; 805 } 806 }
807 kumpf 1.39 } 808
809 mike 1.112 void String::reserveCapacity(Uint32 cap)
810 kumpf 1.39 {
811 mike 1.112 _reserve(_rep, cap);
812 kumpf 1.39 } 813
814 mike 1.112 CString String::getCString() const 815 { 816 // A UTF8 string can have three times as many characters as its UTF16 817 // counterpart, so we allocate extra memory for the worst case. In the 818 // best case, we may need only one third of the memory allocated. But 819 // downsizing the string afterwards is expensive and unecessary since 820 // CString objects are usually short-lived (disappearing after only a few 821 // instructions). CString objects are typically created on the stack as 822 // means to obtain a char* pointer. 823 824 #ifdef PEGASUS_STRING_NO_UTF8 825 char* str = (char*)operator new(_rep->size + 1); 826 _copy(str, _rep->data, _rep->size); 827 str[_rep->size] = '\0'; 828 return CString(str);
829 gs.keenan 1.110 #else
830 mike 1.112 Uint32 n = 3 * _rep->size; 831 char* str = (char*)operator new(n + 1); 832 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 833 str[size] = '\0'; 834 return CString(str);
835 gs.keenan 1.110 #endif
836 kumpf 1.39 } 837
838 mike 1.112 String& String::append(const Char16* str, Uint32 n)
839 kumpf 1.39 {
840 mike 1.112 _checkNullPointer(str); 841 842 size_t oldSize = _rep->size; 843 size_t newSize = oldSize + n; 844 _reserve(_rep, newSize); 845 _copy(_rep->data + oldSize, (Uint16)str, n); 846 _rep->size = newSize; 847 _rep->data[newSize] = '\0'; 848 849 return this;
850 kumpf 1.39 } 851
852 mike 1.112 String& String::append(const String& str)
853 mike 1.27 {
854 mike 1.112 return append((Char16*)str._rep->data, str._rep->size);
855 mike 1.27 } 856
857 mike 1.112 String& String::append(const char* str, Uint32 size)
858 mike 1.27 {
859 mike 1.112 _checkNullPointer(str); 860 861 size_t oldSize = _rep->size; 862 size_t cap = oldSize + size; 863 864 _reserve(_rep, cap); 865 size_t utf8_error_index; 866 size_t tmp = _convert( 867 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 868 869 #ifndef PEGASUS_STRING_NO_THROW 870 if (tmp == size_t(-1)) 871 { 872 StringRep::free(_rep); 873 _rep = &StringRep::_emptyRep; 874 _StringThrowBadUTF8(utf8_error_index); 875 } 876 #endif
877 mike 1.27
878 mike 1.112 _rep->size += tmp; 879 _rep->data[_rep->size] = '\0';
880 mike 1.27
881 kumpf 1.39 return *this; 882 } 883
884 mike 1.112 void String::remove(Uint32 index, Uint32 n)
885 mike 1.27 {
886 mike 1.112 if (n == PEG_NOT_FOUND) 887 n = _rep->size - index; 888 889 _checkBounds(index + n, _rep->size); 890
891 mike 1.114 if (_rep->refs.get() != 1)
892 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
893 mike 1.27
894 mike 1.112 assert(index + n <= _rep->size);
895 mike 1.27
896 mike 1.112 size_t rem = _rep->size - (index + n); 897 Uint16* data = _rep->data;
898 mike 1.27
899 mike 1.112 if (rem) 900 memmove(data + index, data + index + n, rem * sizeof(Uint16));
901 mike 1.27
902 mike 1.112 _rep->size -= n; 903 data[_rep->size] = '\0';
904 mike 1.27 } 905
906 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
907 mike 1.27 {
908 mike 1.112 // Note: this implementation is very permissive but used for 909 // backwards compatibility. 910 911 if (index < _rep->size)
912 mike 1.27 {
913 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index) 914 n = _rep->size - index;
915 mike 1.27
916 mike 1.112 return String((Char16*)_rep->data + index, n);
917 mike 1.27 }
918 david.dillard 1.105 919 return String();
920 mike 1.27 } 921 922 Uint32 String::find(Char16 c) const 923 {
924 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
925 mike 1.27
926 mike 1.112 if (p) 927 return p - _rep->data;
928 mike 1.27 929 return PEG_NOT_FOUND; 930 } 931
932 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
933 mike 1.30 {
934 mike 1.112 _checkBounds(index, _rep->size); 935 936 if (index >= _rep->size) 937 return PEG_NOT_FOUND; 938 939 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
940 mike 1.30
941 mike 1.112 if (p) 942 return p - _rep->data;
943 mike 1.30 944 return PEG_NOT_FOUND; 945 } 946
947 mike 1.112 Uint32 StringFindAux( 948 const StringRep* _rep, const Char16* s, Uint32 n)
949 mike 1.27 {
950 mike 1.112 _checkNullPointer(s);
951 mike 1.27
952 mike 1.112 const Uint16* data = _rep->data; 953 size_t rem = _rep->size; 954 955 while (n <= rem)
956 mike 1.30 {
957 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 958 959 if (!p) 960 break;
961 mike 1.30
962 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0) 963 return p - _rep->data;
964 david.dillard 1.105
965 mike 1.112 p++; 966 rem -= p - data; 967 data = p;
968 mike 1.27 }
969 mike 1.112
970 mike 1.27 return PEG_NOT_FOUND; 971 } 972
973 mike 1.112 Uint32 String::find(const char* s) const 974 { 975 _checkNullPointer(s); 976 977 // Note: could optimize away creation of temporary, but this is rarely 978 // called. 979 return find(String(s)); 980 } 981
982 mike 1.27 Uint32 String::reverseFind(Char16 c) const 983 {
984 mike 1.112 Uint16 x = c; 985 Uint16* p = _rep->data; 986 Uint16* q = _rep->data + _rep->size;
987 mike 1.27
988 mike 1.112 while (q != p)
989 mike 1.27 {
990 mike 1.112 if (*--q == x) 991 return q - p;
992 mike 1.27 } 993 994 return PEG_NOT_FOUND; 995 } 996 997 void String::toLower() 998 {
999 david 1.69 #ifdef PEGASUS_HAS_ICU
1000 mike 1.112
1001 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1002 david 1.90 {
1003 mike 1.114 if (_rep->refs.get() != 1)
1004 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1005
1006 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
1007 mike 1.112 // Since context-sensitive casing looks at adjacent chars, this 1008 // prevents optimizations where the us-ascii is converted before 1009 // calling ICU.
1010 yi.zhou 1.108 // The string may shrink or expand after the convert. 1011
1012 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 1013 //// only the size when zero is passed as the destination size argument. 1014
1015 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1016
1017 mike 1.112 int32_t newSize = u_strToLower( 1018 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err); 1019 1020 err = U_ZERO_ERROR; 1021 1022 //// Reserve enough space for the result. 1023 1024 if ((Uint32)newSize > _rep->cap) 1025 _reserve(_rep, newSize); 1026 1027 //// Perform the conversion (overlapping buffers are allowed).
1028 chuck 1.99
1029 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 1030 (UChar)_rep->data, _rep->size, NULL, &err);
1031 yi.zhou 1.108
1032 mike 1.112 _rep->size = newSize; 1033 return;
1034 david 1.90 }
1035 mike 1.112 1036 #endif /* PEGASUS_HAS_ICU */ 1037
1038 mike 1.114 if (_rep->refs.get() != 1)
1039 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1040 1041 Uint16* p = _rep->data; 1042 size_t n = _rep->size; 1043 1044 for (; n--; p++)
1045 david 1.90 {
1046 mike 1.112 if (!(p & 0xFF00)) 1047 p = _toLower(*p);
1048 mike 1.27 }
1049 kumpf 1.39 } 1050
1051 chuck 1.99 void String::toUpper()
1052 david 1.90 { 1053 #ifdef PEGASUS_HAS_ICU
1054 mike 1.112
1055 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1056 chuck 1.99 {
1057 mike 1.114 if (_rep->refs.get() != 1)
1058 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1059
1060 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
1061 mike 1.112 // Since context-sensitive casing looks at adjacent chars, this 1062 // prevents optimizations where the us-ascii is converted before 1063 // calling ICU.
1064 yi.zhou 1.108 // The string may shrink or expand after the convert. 1065
1066 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 1067 //// only the size when zero is passed as the destination size argument. 1068
1069 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1070
1071 mike 1.112 int32_t newSize = u_strToUpper( 1072 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 1073 1074 err = U_ZERO_ERROR; 1075 1076 //// Reserve enough space for the result. 1077 1078 if ((Uint32)newSize > _rep->cap) 1079 _reserve(_rep, newSize); 1080 1081 //// Perform the conversion (overlapping buffers are allowed). 1082 1083 u_strToUpper((UChar)_rep->data, newSize, 1084 (UChar*)_rep->data, _rep->size, NULL, &err);
1085 chuck 1.99
1086 mike 1.112 _rep->size = newSize;
1087 yi.zhou 1.108
1088 mike 1.112 return;
1089 david 1.91 }
1090 mike 1.112 1091 #endif /* PEGASUS_HAS_ICU */ 1092
1093 mike 1.114 if (_rep->refs.get() != 1)
1094 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1095 1096 Uint16* p = _rep->data; 1097 size_t n = _rep->size; 1098 1099 for (; n--; p++) 1100 p = _toUpper(p);
1101 david 1.90 } 1102
1103 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
1104 kumpf 1.39 {
1105 mike 1.112 assert(n <= s1._rep->size); 1106 assert(n <= s2._rep->size);
1107 mike 1.27
1108 mike 1.112 // Ignoring error in which n is greater than s1.size() or s2.size() 1109 return _compare(s1._rep->data, s2._rep->data, n);
1110 mike 1.27 } 1111
1112 kumpf 1.43 int String::compare(const String& s1, const String& s2)
1113 mike 1.30 {
1114 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 1115 }
1116 kumpf 1.43
1117 mike 1.112 int String::compare(const String& s1, const char* s2) 1118 { 1119 _checkNullPointer(s2);
1120 mike 1.30
1121 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 1122 return _compareNoUTF8(s1._rep->data, s2); 1123 #else 1124 // ATTN: optimize this! 1125 return String::compare(s1, String(s2)); 1126 #endif
1127 mike 1.30 } 1128
1129 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
1130 kumpf 1.40 {
1131 david 1.69 #ifdef PEGASUS_HAS_ICU
1132 mike 1.112
1133 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1134 {
1135 mike 1.112 return u_strcasecmp( 1136 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1137 yi.zhou 1.108 }
1138 kumpf 1.40
1139 mike 1.112 #endif /* PEGASUS_HAS_ICU / 1140 1141 const Uint16 s1 = str1._rep->data; 1142 const Uint16* s2 = str2._rep->data; 1143 1144 while (s1 && s2)
1145 kumpf 1.40 {
1146 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
1147 kumpf 1.40
1148 david.dillard 1.105 if (r) 1149 return r;
1150 kumpf 1.40 } 1151
1152 mike 1.112 if (*s2)
1153 david.dillard 1.105 return -1;
1154 mike 1.112 else if (*s1)
1155 david.dillard 1.105 return 1;
1156 kumpf 1.40 1157 return 0; 1158 } 1159
1160 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1161 mike 1.27 {
1162 mike 1.112 #ifdef PEGASUS_HAS_ICU 1163 1164 return String::compareNoCase(s1, s2) == 0; 1165 1166 #else /* PEGASUS_HAS_ICU */
1167 mike 1.27
1168 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 1169 // eliminate.
1170 kumpf 1.39
1171 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1172 Uint16 q = (Uint16*)s2.getChar16Data(); 1173 Uint32 n = s2.size(); 1174 1175 while (n >= 8) 1176 { 1177 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1178 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1179 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1180 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1181 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1182 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1183 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1184 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1185 { 1186 return false; 1187 }
1188 kumpf 1.39
1189 mike 1.112 n -= 8; 1190 p += 8; 1191 q += 8; 1192 }
1193 mike 1.27
1194 mike 1.112 while (n >= 4)
1195 kumpf 1.39 {
1196 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1197 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1198 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1199 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1200 david.dillard 1.105 {
1201 mike 1.112 return false;
1202 david.dillard 1.105 }
1203 mike 1.112 1204 n -= 4; 1205 p += 4; 1206 q += 4; 1207 } 1208 1209 while (n--) 1210 { 1211 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1212 david.dillard 1.105 return false;
1213 mike 1.112 1214 p++; 1215 q++;
1216 kumpf 1.39 }
1217 mike 1.28
1218 kumpf 1.39 return true;
1219 mike 1.112 1220 #endif /* PEGASUS_HAS_ICU */
1221 david 1.69 } 1222
1223 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1224 david 1.69 {
1225 mike 1.112 _checkNullPointer(s2);
1226 david 1.69
1227 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1228 david 1.69
1229 mike 1.112 return String::equalNoCase(s1, String(s2));
1230 david 1.69
1231 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1232 david 1.69
1233 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1234 const char p2 = s2; 1235 size_t n = s1._rep->size;
1236 david.dillard 1.105
1237 mike 1.112 while (n--) 1238 { 1239 if (!*p2) 1240 return false;
1241 david 1.71
1242 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1243 return false; 1244 }
1245 kumpf 1.42
1246 mike 1.112 if (*p2) 1247 return false; 1248 1249 return true;
1250 karl 1.36
1251 mike 1.112 #else /* PEGASUS_HAS_ICU */
1252 david.dillard 1.105
1253 mike 1.112 // ATTN: optimize this! 1254 return String::equalNoCase(s1, String(s2));
1255 david.dillard 1.105
1256 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1257 }
1258 chuck 1.78
1259 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1260 karl 1.36 {
1261 mike 1.112 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 1262 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1263 karl 1.36 } 1264
1265 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1266 { 1267 #ifdef PEGASUS_STRING_NO_UTF8
1268 kumpf 1.35
1269 mike 1.112 _checkNullPointer(s2);
1270 kumpf 1.39
1271 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1272 const char q = s2;
1273 kumpf 1.39
1274 mike 1.112 while (p && q) 1275 { 1276 if (p++ != Uint16(q++)) 1277 return false; 1278 }
1279 kumpf 1.39
1280 mike 1.112 return !(p \|\| q);
1281 kumpf 1.39
1282 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1283 kumpf 1.39
1284 mike 1.112 return String::equal(s1, String(s2));
1285 kumpf 1.39
1286 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1287 kumpf 1.39 } 1288
1289 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1290 kumpf 1.39 {
1291 mike 1.112 #if defined(PEGASUS_OS_OS400)
1292 david 1.72
1293 david 1.93 CString cstr = str.getCString();
1294 david 1.69 const char* utf8str = cstr;
1295 mike 1.112 os << utf8str; 1296 return os; 1297 #else
1298 david 1.69
1299 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1300 david 1.69
1301 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1302 {
1303 david.dillard 1.105 char buf = NULL; 1304 const int size = str.size() 6;
1305 mike 1.112 UnicodeString UniStr( 1306 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1307 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1308 buf = new char[bufsize+1]; 1309 UniStr.extract(0,bufsize,buf); 1310 os << buf; 1311 os.flush(); 1312 delete [] buf;
1313 mike 1.112 return os;
1314 yi.zhou 1.108 }
1315 mike 1.112 1316 #endif // PEGASUS_HAS_ICU 1317 1318 for (Uint32 i = 0, n = str.size(); i < n; i++)
1319 yi.zhou 1.108 {
1320 mike 1.112 Uint16 code = str[i];
1321 david.dillard 1.105
1322 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1323 os << char(code); 1324 else 1325 { 1326 // Print in hex format: 1327 char buffer[8]; 1328 sprintf(buffer, "\\x%04X", code); 1329 os << buffer;
1330 david.dillard 1.105 }
1331 yi.zhou 1.108 }
1332 kumpf 1.39 1333 return os;
1334 mike 1.112 #endif // PEGASUS_OS_OS400
1335 kumpf 1.39 } 1336
1337 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1338 kumpf 1.39 {
1339 mike 1.112 StringRep* tmp; 1340 1341 if (_rep->cap) 1342 { 1343 tmp = StringRep::alloc(2 * _rep->cap); 1344 tmp->size = _rep->size; 1345 _copy(tmp->data, _rep->data, _rep->size); 1346 } 1347 else 1348 { 1349 tmp = StringRep::alloc(8); 1350 tmp->size = 0; 1351 } 1352 1353 StringRep::unref(_rep); 1354 _rep = tmp;
1355 kumpf 1.39 } 1356
1357 mike 1.112 PEGASUS_NAMESPACE_END 1358 1359 /* 1360 ================================================================================ 1361 1362 String optimizations: 1363 1364 1. Added mechanism allowing certain functions to be inlined only when 1365 used by internal Pegasus modules. External modules (i.e., providers) 1366 link to a non-inline version, which allows for binary compatibility. 1367 1368 2. Implemented copy-on-write with atomic increment/decrement. This 1369 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1370 for the 'ni1000' benchmark. 1371 1372 3. Employed loop unrolling in several places. For example, see: 1373 1374 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1375 1376 4. Used the "empty-rep" optimization (described in whitepaper from the 1377 GCC Developers Summit). This reduced default construction to a simple 1378 mike 1.112 pointer assignment. 1379 1380 inline String::String() : _rep(&_emptyRep) { } 1381 1382 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1383 For example: 1384 1385 static const char _upper[] = 1386 { 1387 0,1,2,...255 1388 }; 1389 1390 inline Uint16 _toUpper(Uint16 x) 1391 { 1392 return (x & 0xFF00) ? x : _upper[x]; 1393 } 1394 1395 This outperforms the system implementation by avoiding an anding 1396 operation. 1397 1398 6. Implemented char* version of the following member functions to 1399 mike 1.112 eliminate unecessary creation of anonymous string objects 1400 (temporaries). 1401 1402 String(const String& s1, const char* s2); 1403 String(const char* s1, const String& s2); 1404 String& String::operator=(const char* str); 1405 Uint32 String::find(const char* s) const; 1406 bool String::equal(const String& s1, const char* s2); 1407 static int String::compare(const String& s1, const char* s2); 1408 String& String::append(const char* str); 1409 String& String::append(const char* str, Uint32 size); 1410 static bool String::equalNoCase(const String& s1, const char* s2); 1411 String& operator=(const char* str) 1412 String& String::assign(const char* str) 1413 String& String::append(const char* str) 1414 Boolean operator==(const String& s1, const char* s2) 1415 Boolean operator==(const char* s1, const String& s2) 1416 Boolean operator!=(const String& s1, const char* s2) 1417 Boolean operator!=(const char* s1, const String& s2) 1418 Boolean operator<(const String& s1, const char* s2) 1419 Boolean operator<(const char* s1, const String& s2) 1420 mike 1.112 Boolean operator>(const String& s1, const char* s2) 1421 Boolean operator>(const char* s1, const String& s2) 1422 Boolean operator<=(const String& s1, const char* s2) 1423 Boolean operator<=(const char* s1, const String& s2) 1424 Boolean operator>=(const String& s1, const char* s2) 1425 Boolean operator>=(const char* s1, const String& s2) 1426 String operator+(const String& s1, const char* s2) 1427 String operator+(const char* s1, const String& s2) 1428 1429 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next 1430 power of two (algorithm from the book "Hacker's Delight"). 1431 1432 static Uint32 _roundUpToPow2(Uint32 x) 1433 { 1434 if (x < 8) 1435 return 8; 1436 1437 x--; 1438 x \|= (x >> 1); 1439 x \|= (x >> 2); 1440 x \|= (x >> 4); 1441 mike 1.112 x \|= (x >> 8); 1442 x \|= (x >> 16); 1443 x++; 1444 1445 return x; 1446 } 1447 1448 8. Implemented "concatenating constructors" to eliminate temporaries 1449 created by operator+(). This scheme employs the "return-value 1450 optimization" described by Stan Lippman. 1451 1452 inline String operator+(const String& s1, const String& s2) 1453 { 1454 return String(s1, s2, 0); 1455 } 1456 1457 9. Experimented to find the optimial initial size for a short string. 1458 Eight seems to offer the best tradeoff between space and time. 1459 1460 10. Inlined all members of the Char16 class. 1461 1462 mike 1.112 11. Used Uint16 internally in the String class. This showed no improvememnt 1463 since Char16 was already fully inlined and was essentially reduced to 1464 Uint16 in any case. 1465 1466 12. Implemented conditional logic (#if) allowing error checking logic to 1467 be excluded to better performance. Examples include bounds checking 1468 and null-pointer checking. 1469 1470 13. Used memcpy() and memcmp() where possible. These are implemented using 1471 the rep family of intructions under Intel and are much faster. 1472 1473 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 1474 copy routine overhead. 1475 1476 15. Added ASCII7 form of the constructor and assign(). 1477 1478 String s("hello world", String::ASCII7); 1479 1480 s.assignASCII7("hello world"); 1481 1482 This avoids slower UTF8 processing when not needed. 1483 mike 1.112 1484 ================================================================================ 1485 1486 TO-DO: 1487 1488 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES 1489 1490 (+) [DONE] Submit BUG-2754 (Windows buffer limit). 1491 1492 (+) [DONE] Eliminate char versions of find() and append(). 1493 1494 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h 1495 1496 (+) [DONE] Change _next_pow_2() to _roundUpToPow2(). 1497 1498 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well). 1499 1500 (+) [DONE] Comment StringRep allocation layout. 1501 1502 (+) [DONE] Conceal private inline functions. 1503 1504 mike 1.112 (+) [DONE] Shorten inclusion of StringInline.h in String.h. 1505 1506 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get 1507 rid of altogether. 1508 1509 (+) [DONE] useCamelNotationOnAllFunctionNames. 1510 1511 (+) [DONE] Check for overlow condition in StringRep::alloc(). 1512 1513 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab"). 1514 1515 (+) [DONE] Fix throw-related memory leak. 1516 1517 (+) [DONE] Look at PEP223 for coding security guidelines. 1518 1519 (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1520 kumpf 1.39
1521 mike 1.112 (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1522 kumpf 1.39
1523 mike 1.112 (+) DOC++ String.h - will open new bug?
1524 kumpf 1.39
1525 mike 1.112 (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression 1526 on certain platforms).
1527 kumpf 1.39
1528 mike 1.112 ================================================================================ 1529 */

No CVS admin address has been configured