pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.119 //%2006////////////////////////////////////////////////////////////////////////
2 mike 1.27 //
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 karl 1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group.
13 mike 1.27 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
18 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions:
20 karl 1.119 //
21 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
22 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
24 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 //
32 mike 1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
33 mike 1.27 //
34 david.dillard 1.116 // Modified By:
35 mike 1.112 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) 36 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
37 david.dillard 1.116 // David Dillard, Symantec Corp. (david_dillard@symantec.com)
38 mike 1.112 // Mike Brasher (mike-brasher@austin.rr.com)
39 mike 1.27 // 40 //%///////////////////////////////////////////////////////////////////////////// 41
42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
43 mike 1.113 #include <cstring>
44 kumpf 1.48 #include "InternalException.h"
45 david 1.69 #include "CommonUTF.h"
46 mike 1.112 #include "MessageLoader.h" 47 #include "StringRep.h"
48 david 1.69 49 #ifdef PEGASUS_HAS_ICU
50 chuck 1.99 #include <unicode/ustring.h> 51 #include <unicode/uchar.h>
52 david 1.69 #endif 53
54 mike 1.112 PEGASUS_NAMESPACE_BEGIN
55 mike 1.28
56 mike 1.112 //============================================================================== 57 // 58 // Compile-time macros (undefined by default). 59 // 60 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
61 david.dillard 1.116 //
62 mike 1.112 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 63 // 64 //==============================================================================
65 mike 1.27
66 mike 1.112 //==============================================================================
67 kumpf 1.39 //
68 mike 1.112 // File-scope definitions:
69 kumpf 1.54 //
70 mike 1.112 //============================================================================== 71 72 // Note: this table is much faster than the system toupper(). Please do not 73 // change.
74 kumpf 1.54
75 david.dillard 1.116 const Uint8 _toUpperTable[256] =
76 kumpf 1.54 {
77 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 78 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 79 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 80 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 81 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 82 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 83 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 84 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 85 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 86 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 87 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 88 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 89 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 90 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 91 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 92 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 93 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 94 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 95 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 96 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 97 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 98 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 99 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 100 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 101 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 102 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 103 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 104 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 105 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 106 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 107 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 108 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 109 }; 110 111 // Note: this table is much faster than the system tulower(). Please do not 112 // change. 113
114 david.dillard 1.116 const Uint8 _toLowerTable[256] =
115 mike 1.112 { 116 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 117 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 118 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 119 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 120 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 121 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 122 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 123 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 124 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 125 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 126 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 127 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 128 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 129 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 130 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 131 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 132 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 133 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 134 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 135 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 136 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 137 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 138 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 139 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 140 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 141 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 142 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 143 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 144 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 145 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 146 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 147 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 148 }; 149 150 // Converts 16-bit characters to upper case. This routine is faster than the 151 // system toupper(). Please do not change. 152 inline Uint16 _toUpper(Uint16 x) 153 { 154 return (x & 0xFF00) ? x : _toUpperTable[x];
155 kumpf 1.54 } 156
157 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 158 // system toupper(). Please do not change. 159 inline Uint16 _toLower(Uint16 x)
160 kumpf 1.54 {
161 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 162 } 163 164 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 165 static Uint32 _roundUpToPow2(Uint32 x) 166 { 167 #ifndef PEGASUS_STRING_NO_THROW 168
169 dave.sudlik 1.119.2.1 // Check for potential overflow in x 170 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
171 mike 1.112 172 #endif 173 174 if (x < 8) 175 return 8; 176 177 x--; 178 x \|= (x >> 1); 179 x \|= (x >> 2); 180 x \|= (x >> 4); 181 x \|= (x >> 8); 182 x \|= (x >> 16); 183 x++; 184 185 return x; 186 } 187 188 template<class P, class Q> 189 static void _copy(P* p, const Q* q, size_t n) 190 { 191 // The following employs loop unrolling for efficiency. Please do not 192 mike 1.112 // eliminate. 193 194 while (n >= 8) 195 { 196 p[0] = q[0]; 197 p[1] = q[1]; 198 p[2] = q[2]; 199 p[3] = q[3]; 200 p[4] = q[4]; 201 p[5] = q[5]; 202 p[6] = q[6]; 203 p[7] = q[7]; 204 p += 8; 205 q += 8; 206 n -= 8; 207 } 208 209 while (n >= 4) 210 { 211 p[0] = q[0]; 212 p[1] = q[1]; 213 mike 1.112 p[2] = q[2]; 214 p[3] = q[3]; 215 p += 4; 216 q += 4; 217 n -= 4; 218 } 219 220 while (n--) 221 p++ = q++; 222 } 223 224 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 225 { 226 // The following employs loop unrolling for efficiency. Please do not 227 // eliminate. 228 229 while (n >= 4) 230 { 231 if (s[0] == c) 232 return (Uint16)s; 233 if (s[1] == c) 234 mike 1.112 return (Uint16)&s[1]; 235 if (s[2] == c) 236 return (Uint16)&s[2]; 237 if (s[3] == c) 238 return (Uint16)&s[3];
239 kumpf 1.82
240 mike 1.112 n -= 4; 241 s += 4; 242 } 243 244 if (n) 245 { 246 if (s == c) 247 return (Uint16)s; 248 s++; 249 n--; 250 } 251 252 if (n) 253 { 254 if (s == c) 255 return (Uint16)s; 256 s++; 257 n--; 258 } 259 260 if (n && s == c) 261 mike 1.112 return (Uint16)s; 262 263 // Not found! 264 return 0; 265 } 266 267 static int _compare(const Uint16* s1, const Uint16* s2) 268 { 269 while (s1 && s2) 270 { 271 int r = s1++ - s2++; 272 273 if (r) 274 return r; 275 } 276 277 if (s2) 278 return -1; 279 else if (s1) 280 return 1; 281 282 mike 1.112 return 0; 283 } 284 285 static int _compareNoUTF8(const Uint16* s1, const char* s2) 286 { 287 Uint16 c1; 288 Uint16 c2; 289 290 do 291 { 292 c1 = s1++; 293 c2 = s2++; 294 295 if (c1 == 0) 296 return c1 - c2; 297 } 298 while (c1 == c2); 299 300 return c1 - c2; 301 } 302 303 mike 1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 304 { 305 memcpy(s1, s2, n * sizeof(Uint16)); 306 } 307 308 void StringThrowOutOfBounds() 309 { 310 throw IndexOutOfBoundsException(); 311 } 312 313 inline void _checkNullPointer(const void* ptr) 314 {
315 kumpf 1.117 #ifndef PEGASUS_STRING_NO_THROW
316 mike 1.112 317 if (!ptr) 318 throw NullPointer(); 319 320 #endif 321 } 322 323 static void _StringThrowBadUTF8(Uint32 index) 324 { 325 MessageLoaderParms parms( 326 "Common.String.BAD_UTF8", 327 "The byte sequence starting at index $0 " 328 "is not valid UTF-8 encoding.", 329 index); 330 throw Exception(parms); 331 } 332 333 static size_t _copyFromUTF8(
334 david.dillard 1.116 Uint16* dest, 335 const char* src,
336 mike 1.112 size_t n, 337 size_t& utf8_error_index) 338 { 339 Uint16* p = dest; 340 const Uint8* q = (const Uint8)src; 341 342 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). 343 // Use loop-unrolling. 344 345 while (n >=8 && ((q[0]\|q[1]\|q[2]\|q[3]\|q[4]\|q[5]\|q[6]\|q[7]) & 0x80) == 0) 346 { 347 p[0] = q[0]; 348 p[1] = q[1]; 349 p[2] = q[2]; 350 p[3] = q[3]; 351 p[4] = q[4]; 352 p[5] = q[5]; 353 p[6] = q[6]; 354 p[7] = q[7]; 355 p += 8; 356 q += 8; 357 mike 1.112 n -= 8; 358 } 359 360 while (n >=4 && ((q[0]\|q[1]\|q[2]\|q[3]) & 0x80) == 0) 361 { 362 p[0] = q[0]; 363 p[1] = q[1]; 364 p[2] = q[2]; 365 p[3] = q[3]; 366 p += 4; 367 q += 4; 368 n -= 4; 369 } 370 371 switch (n) 372 { 373 case 0: 374 return p - dest; 375 case 1: 376 if (q[0] < 128) 377 { 378 mike 1.112 p[0] = q[0]; 379 return p + 1 - dest; 380 } 381 break; 382 case 2: 383 if (((q[0]\|q[1]) & 0x80) == 0) 384 { 385 p[0] = q[0]; 386 p[1] = q[1]; 387 return p + 2 - dest; 388 } 389 break; 390 case 3: 391 if (((q[0]\|q[1]\|q[2]) & 0x80) == 0) 392 { 393 p[0] = q[0]; 394 p[1] = q[1]; 395 p[2] = q[2]; 396 return p + 3 - dest; 397 } 398 break; 399 mike 1.112 } 400 401 // Process remaining characters. 402 403 while (n) 404 { 405 // Optimize for 7-bit ASCII case. 406 407 if (q < 128) 408 { 409 p++ = q++; 410 n--; 411 } 412 else 413 { 414 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(q) + 1; 415 416 if (c > n \|\| !isValid_U8(q, c) \|\| 417 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 418 { 419 utf8_error_index = q - (const Uint8)src; 420 mike 1.112 return size_t(-1); 421 } 422 423 n -= c; 424 } 425 } 426 427 return p - dest; 428 } 429
430 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
431 mike 1.112 // terminator). 432 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 433 { 434 // The following employs loop unrolling for efficiency. Please do not 435 // eliminate. 436 437 const Uint16* q = src; 438 Uint8* p = (Uint8*)dest; 439 440 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
441 kumpf 1.82 {
442 mike 1.112 p[0] = q[0]; 443 p[1] = q[1]; 444 p[2] = q[2]; 445 p[3] = q[3]; 446 p += 4; 447 q += 4; 448 n -= 4;
449 kumpf 1.82 }
450 mike 1.112 451 switch (n) 452 { 453 case 0: 454 return p - (Uint8)dest; 455 case 1: 456 if (q[0] < 128) 457 { 458 p[0] = q[0]; 459 return p + 1 - (Uint8)dest; 460 } 461 break; 462 case 2: 463 if (q[0] < 128 && q[1] < 128) 464 { 465 p[0] = q[0]; 466 p[1] = q[1]; 467 return p + 2 - (Uint8)dest; 468 } 469 break; 470 case 3: 471 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 472 { 473 p[0] = q[0]; 474 p[1] = q[1]; 475 p[2] = q[2]; 476 return p + 3 - (Uint8)dest; 477 } 478 break; 479 } 480 481 // If this line was reached, there must be characters greater than 128. 482 483 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 484 485 return p - (Uint8*)dest;
486 kumpf 1.54 } 487
488 mike 1.112 static inline size_t _convert( 489 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
490 kumpf 1.54 {
491 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 492 _copy(p, q, n); 493 return n; 494 #else 495 return _copyFromUTF8(p, q, n, utf8_error_index); 496 #endif
497 kumpf 1.54 } 498
499 mike 1.112 //============================================================================== 500 // 501 // class CString 502 // 503 //============================================================================== 504 505 CString::CString(const CString& cstr) : _rep(0)
506 kumpf 1.54 {
507 mike 1.112 if (cstr._rep)
508 kumpf 1.82 {
509 mike 1.112 size_t n = strlen(cstr._rep) + 1; 510 _rep = (char*)operator new(n); 511 memcpy(_rep, cstr._rep, n);
512 kumpf 1.82 }
513 kumpf 1.54 } 514
515 kumpf 1.56 CString& CString::operator=(const CString& cstr) 516 {
517 kumpf 1.82 if (&cstr != this)
518 kumpf 1.81 {
519 kumpf 1.82 if (_rep) 520 {
521 mike 1.112 operator delete(_rep);
522 kumpf 1.82 _rep = 0; 523 }
524 mike 1.112
525 kumpf 1.82 if (cstr._rep) 526 {
527 mike 1.112 size_t n = strlen(cstr._rep) + 1; 528 _rep = (char*)operator new(n); 529 memcpy(_rep, cstr._rep, n);
530 kumpf 1.82 }
531 kumpf 1.81 }
532 mike 1.112
533 kumpf 1.56 return *this; 534 } 535
536 mike 1.112 //==============================================================================
537 kumpf 1.54 //
538 mike 1.112 // class StringRep
539 kumpf 1.39 //
540 mike 1.112 //==============================================================================
541 kumpf 1.39
542 mike 1.112 StringRep StringRep::_emptyRep;
543 mike 1.27
544 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
545 mike 1.27 {
546 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
547 dave.sudlik 1.119.2.1 548 // Check for potential overflow in cap 549 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
550 mike 1.27
551 mike 1.112 #endif
552 mike 1.27
553 mike 1.112 StringRep* rep = (StringRep)::operator new( 554 sizeof(StringRep) + cap sizeof(Uint16)); 555 rep->cap = cap; 556 new(&rep->refs) AtomicInt(1); 557 558 return rep;
559 mike 1.27 } 560
561 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
562 chuck 1.102 {
563 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
564 chuck 1.102 {
565 mike 1.112 size_t n = _roundUpToPow2(cap); 566 StringRep* newRep = StringRep::alloc(n); 567 newRep->size = rep->size; 568 _copy(newRep->data, rep->data, rep->size + 1); 569 StringRep::unref(rep); 570 rep = newRep; 571 } 572 }
573 david.dillard 1.105
574 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 575 { 576 StringRep* rep = StringRep::alloc(size); 577 rep->size = size; 578 _copy(rep->data, data, size); 579 rep->data[size] = '\0'; 580 return rep; 581 }
582 chuck 1.102
583 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 584 { 585 // Return a new copy of rep. Release rep.
586 chuck 1.102
587 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 588 newRep->size = rep->size; 589 _copy(newRep->data, rep->data, rep->size); 590 newRep->data[newRep->size] = '\0'; 591 StringRep::unref(rep); 592 return newRep;
593 chuck 1.102 } 594
595 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
596 kumpf 1.43 {
597 mike 1.112 StringRep* rep = StringRep::alloc(size); 598 size_t utf8_error_index; 599 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 600 601 #ifndef PEGASUS_STRING_NO_THROW 602 if (rep->size == size_t(-1)) 603 { 604 StringRep::free(rep); 605 _StringThrowBadUTF8(utf8_error_index); 606 } 607 #endif
608 kumpf 1.43
609 mike 1.112 rep->data[rep->size] = '\0';
610 kumpf 1.43
611 mike 1.112 return rep;
612 mike 1.27 } 613
614 mike 1.112 Uint32 StringRep::length(const Uint16* str)
615 mike 1.27 {
616 mike 1.112 // Note: We could unroll this but it is rarely called. 617 618 const Uint16* end = (Uint16)str; 619 620 while (end++) 621 ; 622 623 return end - str - 1;
624 kumpf 1.39 }
625 tony 1.66
626 mike 1.112 //============================================================================== 627 // 628 // class String 629 // 630 //============================================================================== 631 632 const String String::EMPTY;
633 mike 1.27
634 kumpf 1.39 String::String(const String& str, Uint32 n) 635 {
636 mike 1.112 _checkBounds(n, str._rep->size); 637 _rep = StringRep::create(str._rep->data, n);
638 kumpf 1.39 } 639 640 String::String(const Char16* str) 641 {
642 mike 1.112 _checkNullPointer(str); 643 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
644 mike 1.27 } 645
646 kumpf 1.39 String::String(const Char16* str, Uint32 n) 647 {
648 mike 1.112 _checkNullPointer(str); 649 _rep = StringRep::create((Uint16*)str, n);
650 kumpf 1.39 } 651 652 String::String(const char* str)
653 mike 1.27 {
654 mike 1.112 _checkNullPointer(str);
655 david.dillard 1.105
656 mike 1.112 // Set this just in case create() throws an exception. 657 _rep = &StringRep::_emptyRep; 658 _rep = StringRep::create(str, strlen(str));
659 mike 1.27 } 660
661 kumpf 1.39 String::String(const char* str, Uint32 n)
662 mike 1.27 {
663 mike 1.112 _checkNullPointer(str);
664 david.dillard 1.105
665 mike 1.112 // Set this just in case create() throws an exception. 666 _rep = &StringRep::_emptyRep; 667 _rep = StringRep::create(str, n);
668 kumpf 1.39 }
669 mike 1.27
670 mike 1.112 String::String(const String& s1, const String& s2)
671 kumpf 1.39 {
672 mike 1.112 size_t n1 = s1._rep->size; 673 size_t n2 = s2._rep->size; 674 size_t n = n1 + n2; 675 _rep = StringRep::alloc(n); 676 _copy(_rep->data, s1._rep->data, n1); 677 _copy(_rep->data + n1, s2._rep->data, n2); 678 _rep->size = n; 679 _rep->data[n] = '\0';
680 mike 1.27 } 681
682 mike 1.112 String::String(const String& s1, const char* s2)
683 mike 1.27 {
684 mike 1.112 _checkNullPointer(s2); 685 size_t n1 = s1._rep->size; 686 size_t n2 = strlen(s2); 687 _rep = StringRep::alloc(n1 + n2); 688 _copy(_rep->data, s1._rep->data, n1); 689 size_t utf8_error_index; 690 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 691 692 #ifndef PEGASUS_STRING_NO_THROW 693 if (tmp == size_t(-1))
694 kumpf 1.82 {
695 mike 1.112 StringRep::free(_rep); 696 _rep = &StringRep::_emptyRep; 697 _StringThrowBadUTF8(utf8_error_index);
698 kumpf 1.82 }
699 mike 1.112 #endif 700 701 _rep->size = n1 + tmp; 702 _rep->data[_rep->size] = '\0';
703 mike 1.27 } 704
705 mike 1.112 String::String(const char* s1, const String& s2)
706 mike 1.27 {
707 mike 1.112 _checkNullPointer(s1); 708 size_t n1 = strlen(s1); 709 size_t n2 = s2._rep->size; 710 _rep = StringRep::alloc(n1 + n2); 711 size_t utf8_error_index; 712 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 713 714 #ifndef PEGASUS_STRING_NO_THROW 715 if (tmp == size_t(-1)) 716 { 717 StringRep::free(_rep); 718 _rep = &StringRep::_emptyRep; 719 _StringThrowBadUTF8(utf8_error_index); 720 } 721 #endif 722 723 _rep->size = n2 + tmp; 724 _copy(_rep->data + n1, s2._rep->data, n2); 725 _rep->data[_rep->size] = '\0';
726 mike 1.27 } 727
728 mike 1.112 String& String::assign(const String& str)
729 mike 1.27 {
730 mike 1.112 if (_rep != str._rep)
731 david.dillard 1.105 {
732 mike 1.112 StringRep::unref(_rep); 733 StringRep::ref(_rep = str._rep);
734 david.dillard 1.105 } 735
736 mike 1.27 return this; 737 } 738 739 String& String::assign(const Char16 str, Uint32 n) 740 {
741 mike 1.112 _checkNullPointer(str); 742
743 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
744 david.dillard 1.105 {
745 mike 1.112 StringRep::unref(_rep); 746 _rep = StringRep::alloc(n);
747 david.dillard 1.105 } 748
749 mike 1.112 _rep->size = n; 750 _copy(_rep->data, (Uint16*)str, n); 751 _rep->data[n] = '\0'; 752
753 mike 1.27 return *this; 754 } 755
756 mike 1.112 String& String::assign(const char* str, Uint32 n)
757 chuck 1.102 {
758 mike 1.112 _checkNullPointer(str); 759
760 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
761 david.dillard 1.105 {
762 mike 1.112 StringRep::unref(_rep); 763 _rep = StringRep::alloc(n);
764 david.dillard 1.105 } 765
766 mike 1.112 size_t utf8_error_index; 767 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
768 chuck 1.102
769 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW 770 if (_rep->size == size_t(-1))
771 david.dillard 1.105 {
772 mike 1.112 StringRep::free(_rep); 773 _rep = &StringRep::_emptyRep; 774 _StringThrowBadUTF8(utf8_error_index);
775 david.dillard 1.105 }
776 mike 1.112 #endif 777 778 _rep->data[_rep->size] = 0;
779 david.dillard 1.105
780 mike 1.27 return *this; 781 } 782
783 kumpf 1.39 void String::clear() 784 {
785 mike 1.112 if (_rep->size) 786 {
787 mike 1.114 if (_rep->refs.get() == 1)
788 mike 1.112 { 789 _rep->size = 0; 790 _rep->data[0] = '\0'; 791 } 792 else 793 { 794 StringRep::unref(_rep); 795 _rep = &StringRep::_emptyRep; 796 } 797 }
798 kumpf 1.39 } 799
800 mike 1.112 void String::reserveCapacity(Uint32 cap)
801 kumpf 1.39 {
802 mike 1.112 _reserve(_rep, cap);
803 kumpf 1.39 } 804
805 mike 1.112 CString String::getCString() const 806 {
807 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16 808 // counterpart, so we allocate extra memory for the worst case. In the
809 mike 1.112 // best case, we may need only one third of the memory allocated. But
810 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since 811 // CString objects are usually short-lived (disappearing after only a few
812 mike 1.112 // instructions). CString objects are typically created on the stack as 813 // means to obtain a char* pointer. 814 815 #ifdef PEGASUS_STRING_NO_UTF8 816 char* str = (char*)operator new(_rep->size + 1); 817 _copy(str, _rep->data, _rep->size); 818 str[_rep->size] = '\0'; 819 return CString(str);
820 gs.keenan 1.110 #else
821 mike 1.112 Uint32 n = 3 * _rep->size; 822 char* str = (char*)operator new(n + 1); 823 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 824 str[size] = '\0'; 825 return CString(str);
826 gs.keenan 1.110 #endif
827 kumpf 1.39 } 828
829 mike 1.112 String& String::append(const Char16* str, Uint32 n)
830 kumpf 1.39 {
831 mike 1.112 _checkNullPointer(str); 832 833 size_t oldSize = _rep->size; 834 size_t newSize = oldSize + n; 835 _reserve(_rep, newSize); 836 _copy(_rep->data + oldSize, (Uint16)str, n); 837 _rep->size = newSize; 838 _rep->data[newSize] = '\0'; 839 840 return this;
841 kumpf 1.39 } 842
843 mike 1.112 String& String::append(const String& str)
844 mike 1.27 {
845 mike 1.112 return append((Char16*)str._rep->data, str._rep->size);
846 mike 1.27 } 847
848 mike 1.112 String& String::append(const char* str, Uint32 size)
849 mike 1.27 {
850 mike 1.112 _checkNullPointer(str); 851 852 size_t oldSize = _rep->size; 853 size_t cap = oldSize + size; 854 855 _reserve(_rep, cap); 856 size_t utf8_error_index; 857 size_t tmp = _convert( 858 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 859 860 #ifndef PEGASUS_STRING_NO_THROW 861 if (tmp == size_t(-1)) 862 { 863 StringRep::free(_rep); 864 _rep = &StringRep::_emptyRep; 865 _StringThrowBadUTF8(utf8_error_index); 866 } 867 #endif
868 mike 1.27
869 mike 1.112 _rep->size += tmp; 870 _rep->data[_rep->size] = '\0';
871 mike 1.27
872 kumpf 1.39 return *this; 873 } 874
875 mike 1.112 void String::remove(Uint32 index, Uint32 n)
876 mike 1.27 {
877 mike 1.112 if (n == PEG_NOT_FOUND) 878 n = _rep->size - index; 879 880 _checkBounds(index + n, _rep->size); 881
882 mike 1.114 if (_rep->refs.get() != 1)
883 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
884 mike 1.27
885 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
886 mike 1.27
887 mike 1.112 size_t rem = _rep->size - (index + n); 888 Uint16* data = _rep->data;
889 mike 1.27
890 mike 1.112 if (rem) 891 memmove(data + index, data + index + n, rem * sizeof(Uint16));
892 mike 1.27
893 mike 1.112 _rep->size -= n; 894 data[_rep->size] = '\0';
895 mike 1.27 } 896
897 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
898 mike 1.27 {
899 mike 1.112 // Note: this implementation is very permissive but used for 900 // backwards compatibility. 901 902 if (index < _rep->size)
903 mike 1.27 {
904 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index) 905 n = _rep->size - index;
906 mike 1.27
907 mike 1.112 return String((Char16*)_rep->data + index, n);
908 mike 1.27 }
909 david.dillard 1.105 910 return String();
911 mike 1.27 } 912 913 Uint32 String::find(Char16 c) const 914 {
915 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
916 mike 1.27
917 mike 1.112 if (p)
918 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
919 mike 1.27 920 return PEG_NOT_FOUND; 921 } 922
923 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
924 mike 1.30 {
925 mike 1.112 _checkBounds(index, _rep->size); 926 927 if (index >= _rep->size) 928 return PEG_NOT_FOUND; 929 930 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
931 mike 1.30
932 mike 1.112 if (p)
933 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
934 mike 1.30 935 return PEG_NOT_FOUND; 936 } 937
938 mike 1.112 Uint32 StringFindAux( 939 const StringRep* _rep, const Char16* s, Uint32 n)
940 mike 1.27 {
941 mike 1.112 _checkNullPointer(s);
942 mike 1.27
943 mike 1.112 const Uint16* data = _rep->data; 944 size_t rem = _rep->size; 945 946 while (n <= rem)
947 mike 1.30 {
948 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 949 950 if (!p) 951 break;
952 mike 1.30
953 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
954 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
955 david.dillard 1.105
956 mike 1.112 p++; 957 rem -= p - data; 958 data = p;
959 mike 1.27 }
960 mike 1.112
961 mike 1.27 return PEG_NOT_FOUND; 962 } 963
964 mike 1.112 Uint32 String::find(const char* s) const 965 { 966 _checkNullPointer(s); 967 968 // Note: could optimize away creation of temporary, but this is rarely 969 // called. 970 return find(String(s)); 971 } 972
973 mike 1.27 Uint32 String::reverseFind(Char16 c) const 974 {
975 mike 1.112 Uint16 x = c; 976 Uint16* p = _rep->data; 977 Uint16* q = _rep->data + _rep->size;
978 mike 1.27
979 mike 1.112 while (q != p)
980 mike 1.27 {
981 mike 1.112 if (*--q == x)
982 david.dillard 1.116 return static_cast<Uint32>(q - p);
983 mike 1.27 } 984 985 return PEG_NOT_FOUND; 986 } 987 988 void String::toLower() 989 {
990 david 1.69 #ifdef PEGASUS_HAS_ICU
991 mike 1.112
992 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
993 david 1.90 {
994 mike 1.114 if (_rep->refs.get() != 1)
995 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 996
997 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
998 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 999 // prevents optimizations where the us-ascii is converted before
1000 mike 1.112 // calling ICU.
1001 yi.zhou 1.108 // The string may shrink or expand after the convert. 1002
1003 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 1004 //// only the size when zero is passed as the destination size argument. 1005
1006 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1007
1008 mike 1.112 int32_t newSize = u_strToLower( 1009 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1010 david.dillard 1.116
1011 mike 1.112 err = U_ZERO_ERROR; 1012 1013 //// Reserve enough space for the result. 1014 1015 if ((Uint32)newSize > _rep->cap) 1016 _reserve(_rep, newSize); 1017 1018 //// Perform the conversion (overlapping buffers are allowed).
1019 chuck 1.99
1020 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 1021 (UChar)_rep->data, _rep->size, NULL, &err);
1022 yi.zhou 1.108
1023 mike 1.112 _rep->size = newSize; 1024 return;
1025 david 1.90 }
1026 mike 1.112 1027 #endif /* PEGASUS_HAS_ICU */ 1028
1029 mike 1.114 if (_rep->refs.get() != 1)
1030 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1031 1032 Uint16* p = _rep->data; 1033 size_t n = _rep->size; 1034 1035 for (; n--; p++)
1036 david 1.90 {
1037 mike 1.112 if (!(p & 0xFF00)) 1038 p = _toLower(*p);
1039 mike 1.27 }
1040 kumpf 1.39 } 1041
1042 chuck 1.99 void String::toUpper()
1043 david 1.90 { 1044 #ifdef PEGASUS_HAS_ICU
1045 mike 1.112
1046 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1047 chuck 1.99 {
1048 mike 1.114 if (_rep->refs.get() != 1)
1049 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1050
1051 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
1052 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 1053 // prevents optimizations where the us-ascii is converted before
1054 mike 1.112 // calling ICU.
1055 yi.zhou 1.108 // The string may shrink or expand after the convert. 1056
1057 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 1058 //// only the size when zero is passed as the destination size argument. 1059
1060 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1061
1062 mike 1.112 int32_t newSize = u_strToUpper( 1063 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 1064 1065 err = U_ZERO_ERROR; 1066 1067 //// Reserve enough space for the result. 1068 1069 if ((Uint32)newSize > _rep->cap) 1070 _reserve(_rep, newSize); 1071 1072 //// Perform the conversion (overlapping buffers are allowed). 1073 1074 u_strToUpper((UChar)_rep->data, newSize, 1075 (UChar*)_rep->data, _rep->size, NULL, &err);
1076 chuck 1.99
1077 mike 1.112 _rep->size = newSize;
1078 yi.zhou 1.108
1079 mike 1.112 return;
1080 david 1.91 }
1081 mike 1.112 1082 #endif /* PEGASUS_HAS_ICU */ 1083
1084 mike 1.114 if (_rep->refs.get() != 1)
1085 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1086 1087 Uint16* p = _rep->data; 1088 size_t n = _rep->size; 1089 1090 for (; n--; p++) 1091 p = _toUpper(p);
1092 david 1.90 } 1093
1094 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
1095 kumpf 1.39 {
1096 kumpf 1.118 const Uint16* p1 = s1._rep->data; 1097 const Uint16* p2 = s2._rep->data;
1098 mike 1.27
1099 kumpf 1.118 while (n--) 1100 { 1101 int r = p1++ - p2++; 1102 if (r) 1103 { 1104 return r; 1105 } 1106 else if (!p1[-1]) 1107 { 1108 // We must have encountered a null terminator in both s1 and s2 1109 return 0; 1110 } 1111 } 1112 return 0;
1113 mike 1.27 } 1114
1115 kumpf 1.43 int String::compare(const String& s1, const String& s2)
1116 mike 1.30 {
1117 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 1118 }
1119 kumpf 1.43
1120 mike 1.112 int String::compare(const String& s1, const char* s2) 1121 { 1122 _checkNullPointer(s2);
1123 mike 1.30
1124 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 1125 return _compareNoUTF8(s1._rep->data, s2); 1126 #else 1127 // ATTN: optimize this! 1128 return String::compare(s1, String(s2)); 1129 #endif
1130 mike 1.30 } 1131
1132 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
1133 kumpf 1.40 {
1134 david 1.69 #ifdef PEGASUS_HAS_ICU
1135 mike 1.112
1136 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1137 {
1138 mike 1.112 return u_strcasecmp( 1139 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1140 yi.zhou 1.108 }
1141 kumpf 1.40
1142 mike 1.112 #endif /* PEGASUS_HAS_ICU / 1143 1144 const Uint16 s1 = str1._rep->data; 1145 const Uint16* s2 = str2._rep->data; 1146 1147 while (s1 && s2)
1148 kumpf 1.40 {
1149 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
1150 kumpf 1.40
1151 david.dillard 1.105 if (r) 1152 return r;
1153 kumpf 1.40 } 1154
1155 mike 1.112 if (*s2)
1156 david.dillard 1.105 return -1;
1157 mike 1.112 else if (*s1)
1158 david.dillard 1.105 return 1;
1159 kumpf 1.40 1160 return 0; 1161 } 1162
1163 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1164 mike 1.27 {
1165 mike 1.112 #ifdef PEGASUS_HAS_ICU 1166 1167 return String::compareNoCase(s1, s2) == 0; 1168 1169 #else /* PEGASUS_HAS_ICU */
1170 mike 1.27
1171 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 1172 // eliminate.
1173 kumpf 1.39
1174 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1175 Uint16 q = (Uint16*)s2.getChar16Data(); 1176 Uint32 n = s2.size(); 1177 1178 while (n >= 8) 1179 { 1180 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1181 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1182 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1183 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1184 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1185 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1186 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1187 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1188 { 1189 return false; 1190 }
1191 kumpf 1.39
1192 mike 1.112 n -= 8; 1193 p += 8; 1194 q += 8; 1195 }
1196 mike 1.27
1197 mike 1.112 while (n >= 4)
1198 kumpf 1.39 {
1199 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1200 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1201 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1202 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1203 david.dillard 1.105 {
1204 mike 1.112 return false;
1205 david.dillard 1.105 }
1206 mike 1.112 1207 n -= 4; 1208 p += 4; 1209 q += 4; 1210 } 1211 1212 while (n--) 1213 { 1214 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1215 david.dillard 1.105 return false;
1216 mike 1.112 1217 p++; 1218 q++;
1219 kumpf 1.39 }
1220 mike 1.28
1221 kumpf 1.39 return true;
1222 mike 1.112 1223 #endif /* PEGASUS_HAS_ICU */
1224 david 1.69 } 1225
1226 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1227 david 1.69 {
1228 mike 1.112 _checkNullPointer(s2);
1229 david 1.69
1230 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1231 david 1.69
1232 mike 1.112 return String::equalNoCase(s1, String(s2));
1233 david 1.69
1234 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1235 david 1.69
1236 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1237 const char p2 = s2; 1238 size_t n = s1._rep->size;
1239 david.dillard 1.105
1240 mike 1.112 while (n--) 1241 { 1242 if (!*p2) 1243 return false;
1244 david 1.71
1245 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1246 return false; 1247 }
1248 kumpf 1.42
1249 mike 1.112 if (*p2) 1250 return false;
1251 david.dillard 1.116
1252 mike 1.112 return true;
1253 karl 1.36
1254 mike 1.112 #else /* PEGASUS_HAS_ICU */
1255 david.dillard 1.105
1256 mike 1.112 // ATTN: optimize this! 1257 return String::equalNoCase(s1, String(s2));
1258 david.dillard 1.105
1259 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1260 }
1261 chuck 1.78
1262 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1263 karl 1.36 {
1264 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1265 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1266 karl 1.36 } 1267
1268 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1269 { 1270 #ifdef PEGASUS_STRING_NO_UTF8
1271 kumpf 1.35
1272 mike 1.112 _checkNullPointer(s2);
1273 kumpf 1.39
1274 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1275 const char q = s2;
1276 kumpf 1.39
1277 mike 1.112 while (p && q) 1278 { 1279 if (p++ != Uint16(q++)) 1280 return false; 1281 }
1282 kumpf 1.39
1283 mike 1.112 return !(p \|\| q);
1284 kumpf 1.39
1285 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1286 kumpf 1.39
1287 mike 1.112 return String::equal(s1, String(s2));
1288 kumpf 1.39
1289 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1290 kumpf 1.39 } 1291
1292 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1293 kumpf 1.39 {
1294 mike 1.112 #if defined(PEGASUS_OS_OS400)
1295 david 1.72
1296 david 1.93 CString cstr = str.getCString();
1297 david 1.69 const char* utf8str = cstr;
1298 mike 1.112 os << utf8str; 1299 return os;
1300 david.dillard 1.116 #else
1301 david 1.69
1302 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1303 david 1.69
1304 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1305 {
1306 david.dillard 1.105 char buf = NULL; 1307 const int size = str.size() 6;
1308 mike 1.112 UnicodeString UniStr( 1309 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1310 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1311 buf = new char[bufsize+1]; 1312 UniStr.extract(0,bufsize,buf); 1313 os << buf; 1314 os.flush(); 1315 delete [] buf;
1316 david.dillard 1.116 return os;
1317 yi.zhou 1.108 }
1318 mike 1.112
1319 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
1320 mike 1.112 1321 for (Uint32 i = 0, n = str.size(); i < n; i++)
1322 yi.zhou 1.108 {
1323 mike 1.112 Uint16 code = str[i];
1324 david.dillard 1.105
1325 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1326 os << char(code); 1327 else 1328 { 1329 // Print in hex format: 1330 char buffer[8]; 1331 sprintf(buffer, "\\x%04X", code); 1332 os << buffer;
1333 david.dillard 1.105 }
1334 yi.zhou 1.108 }
1335 kumpf 1.39 1336 return os;
1337 mike 1.112 #endif // PEGASUS_OS_OS400
1338 kumpf 1.39 } 1339
1340 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1341 kumpf 1.39 {
1342 mike 1.112 StringRep* tmp; 1343 1344 if (_rep->cap) 1345 { 1346 tmp = StringRep::alloc(2 * _rep->cap); 1347 tmp->size = _rep->size; 1348 _copy(tmp->data, _rep->data, _rep->size); 1349 } 1350 else 1351 { 1352 tmp = StringRep::alloc(8); 1353 tmp->size = 0; 1354 } 1355 1356 StringRep::unref(_rep); 1357 _rep = tmp;
1358 kumpf 1.39 } 1359
1360 mike 1.112 PEGASUS_NAMESPACE_END 1361 1362 /* 1363 ================================================================================ 1364 1365 String optimizations: 1366 1367 1. Added mechanism allowing certain functions to be inlined only when 1368 used by internal Pegasus modules. External modules (i.e., providers) 1369 link to a non-inline version, which allows for binary compatibility. 1370 1371 2. Implemented copy-on-write with atomic increment/decrement. This 1372 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1373 for the 'ni1000' benchmark. 1374 1375 3. Employed loop unrolling in several places. For example, see: 1376 1377 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1378 1379 4. Used the "empty-rep" optimization (described in whitepaper from the 1380 GCC Developers Summit). This reduced default construction to a simple 1381 mike 1.112 pointer assignment. 1382 1383 inline String::String() : _rep(&_emptyRep) { } 1384 1385 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1386 For example: 1387 1388 static const char _upper[] = 1389 { 1390 0,1,2,...255 1391 }; 1392 1393 inline Uint16 _toUpper(Uint16 x) 1394 { 1395 return (x & 0xFF00) ? x : _upper[x]; 1396 } 1397
1398 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
1399 mike 1.112 operation. 1400
1401 david.dillard 1.116 6. Implemented char* version of the following member functions to 1402 eliminate unecessary creation of anonymous string objects
1403 mike 1.112 (temporaries). 1404 1405 String(const String& s1, const char* s2); 1406 String(const char* s1, const String& s2); 1407 String& String::operator=(const char* str); 1408 Uint32 String::find(const char* s) const; 1409 bool String::equal(const String& s1, const char* s2); 1410 static int String::compare(const String& s1, const char* s2); 1411 String& String::append(const char* str); 1412 String& String::append(const char* str, Uint32 size); 1413 static bool String::equalNoCase(const String& s1, const char* s2); 1414 String& operator=(const char* str) 1415 String& String::assign(const char* str) 1416 String& String::append(const char* str) 1417 Boolean operator==(const String& s1, const char* s2) 1418 Boolean operator==(const char* s1, const String& s2) 1419 Boolean operator!=(const String& s1, const char* s2) 1420 Boolean operator!=(const char* s1, const String& s2) 1421 Boolean operator<(const String& s1, const char* s2) 1422 Boolean operator<(const char* s1, const String& s2) 1423 Boolean operator>(const String& s1, const char* s2) 1424 mike 1.112 Boolean operator>(const char* s1, const String& s2) 1425 Boolean operator<=(const String& s1, const char* s2) 1426 Boolean operator<=(const char* s1, const String& s2) 1427 Boolean operator>=(const String& s1, const char* s2) 1428 Boolean operator>=(const char* s1, const String& s2) 1429 String operator+(const String& s1, const char* s2) 1430 String operator+(const char* s1, const String& s2) 1431
1432 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
1433 mike 1.112 power of two (algorithm from the book "Hacker's Delight"). 1434 1435 static Uint32 _roundUpToPow2(Uint32 x) 1436 { 1437 if (x < 8) 1438 return 8; 1439 1440 x--; 1441 x \|= (x >> 1); 1442 x \|= (x >> 2); 1443 x \|= (x >> 4); 1444 x \|= (x >> 8); 1445 x \|= (x >> 16); 1446 x++; 1447 1448 return x; 1449 } 1450 1451 8. Implemented "concatenating constructors" to eliminate temporaries
1452 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
1453 mike 1.112 optimization" described by Stan Lippman. 1454 1455 inline String operator+(const String& s1, const String& s2) 1456 { 1457 return String(s1, s2, 0); 1458 } 1459 1460 9. Experimented to find the optimial initial size for a short string. 1461 Eight seems to offer the best tradeoff between space and time. 1462 1463 10. Inlined all members of the Char16 class. 1464 1465 11. Used Uint16 internally in the String class. This showed no improvememnt 1466 since Char16 was already fully inlined and was essentially reduced to 1467 Uint16 in any case. 1468 1469 12. Implemented conditional logic (#if) allowing error checking logic to
1470 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
1471 mike 1.112 and null-pointer checking. 1472 1473 13. Used memcpy() and memcmp() where possible. These are implemented using 1474 the rep family of intructions under Intel and are much faster. 1475
1476 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1477 mike 1.112 copy routine overhead. 1478 1479 15. Added ASCII7 form of the constructor and assign(). 1480 1481 String s("hello world", String::ASCII7); 1482 1483 s.assignASCII7("hello world"); 1484 1485 This avoids slower UTF8 processing when not needed. 1486 1487 ================================================================================ 1488 1489 TO-DO: 1490 1491 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES 1492 1493 (+) [DONE] Submit BUG-2754 (Windows buffer limit). 1494 1495 (+) [DONE] Eliminate char versions of find() and append(). 1496 1497 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h 1498 mike 1.112 1499 (+) [DONE] Change _next_pow_2() to _roundUpToPow2(). 1500 1501 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well). 1502 1503 (+) [DONE] Comment StringRep allocation layout. 1504 1505 (+) [DONE] Conceal private inline functions. 1506 1507 (+) [DONE] Shorten inclusion of StringInline.h in String.h. 1508 1509 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get 1510 rid of altogether. 1511 1512 (+) [DONE] useCamelNotationOnAllFunctionNames. 1513 1514 (+) [DONE] Check for overlow condition in StringRep::alloc(). 1515 1516 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab"). 1517 1518 (+) [DONE] Fix throw-related memory leak. 1519 mike 1.112 1520 (+) [DONE] Look at PEP223 for coding security guidelines. 1521 1522 (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1523 kumpf 1.39
1524 mike 1.112 (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1525 kumpf 1.39
1526 mike 1.112 (+) DOC++ String.h - will open new bug?
1527 kumpf 1.39
1528 mike 1.112 (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression 1529 on certain platforms).
1530 kumpf 1.39
1531 mike 1.112 ================================================================================ 1532 */

No CVS admin address has been configured