pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.119 //%2006////////////////////////////////////////////////////////////////////////
2 mike 1.27 //
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 karl 1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group.
13 mike 1.27 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
18 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions:
20 karl 1.119 //
21 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
22 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
24 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 //
32 mike 1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
33 mike 1.27 //
34 david.dillard 1.116 // Modified By:
35 mike 1.112 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) 36 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
37 david.dillard 1.116 // David Dillard, Symantec Corp. (david_dillard@symantec.com)
38 mike 1.112 // Mike Brasher (mike-brasher@austin.rr.com)
39 mike 1.27 // 40 //%///////////////////////////////////////////////////////////////////////////// 41
42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
43 mike 1.113 #include <cstring>
44 kumpf 1.48 #include "InternalException.h"
45 david 1.69 #include "CommonUTF.h"
46 mike 1.112 #include "MessageLoader.h" 47 #include "StringRep.h"
48 david 1.69 49 #ifdef PEGASUS_HAS_ICU
50 chuck 1.99 #include <unicode/ustring.h> 51 #include <unicode/uchar.h>
52 david 1.69 #endif 53
54 mike 1.112 PEGASUS_NAMESPACE_BEGIN
55 mike 1.28
56 mike 1.112 //============================================================================== 57 // 58 // Compile-time macros (undefined by default). 59 // 60 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
61 david.dillard 1.116 //
62 mike 1.112 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 63 // 64 //==============================================================================
65 mike 1.27
66 mike 1.112 //==============================================================================
67 kumpf 1.39 //
68 mike 1.112 // File-scope definitions:
69 kumpf 1.54 //
70 mike 1.112 //============================================================================== 71 72 // Note: this table is much faster than the system toupper(). Please do not 73 // change.
74 kumpf 1.54
75 david.dillard 1.116 const Uint8 _toUpperTable[256] =
76 kumpf 1.54 {
77 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 78 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 79 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 80 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 81 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 82 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 83 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 84 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 85 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 86 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 87 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 88 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 89 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 90 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 91 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 92 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 93 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 94 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 95 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 96 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 97 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 98 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 99 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 100 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 101 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 102 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 103 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 104 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 105 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 106 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 107 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 108 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 109 }; 110 111 // Note: this table is much faster than the system tulower(). Please do not 112 // change. 113
114 david.dillard 1.116 const Uint8 _toLowerTable[256] =
115 mike 1.112 { 116 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 117 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 118 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 119 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 120 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 121 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 122 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 123 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 124 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 125 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 126 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 127 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 128 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 129 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 130 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 131 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 132 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 133 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 134 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 135 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 136 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 137 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 138 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 139 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 140 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 141 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 142 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 143 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 144 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 145 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 146 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 147 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 148 }; 149 150 // Converts 16-bit characters to upper case. This routine is faster than the 151 // system toupper(). Please do not change. 152 inline Uint16 _toUpper(Uint16 x) 153 { 154 return (x & 0xFF00) ? x : _toUpperTable[x];
155 kumpf 1.54 } 156
157 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 158 // system toupper(). Please do not change. 159 inline Uint16 _toLower(Uint16 x)
160 kumpf 1.54 {
161 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 162 } 163 164 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 165 static Uint32 _roundUpToPow2(Uint32 x) 166 { 167 #ifndef PEGASUS_STRING_NO_THROW 168
169 dave.sudlik 1.119.2.1 // Check for potential overflow in x 170 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
171 mike 1.112 172 #endif 173 174 if (x < 8) 175 return 8; 176 177 x--; 178 x \|= (x >> 1); 179 x \|= (x >> 2); 180 x \|= (x >> 4); 181 x \|= (x >> 8); 182 x \|= (x >> 16); 183 x++; 184 185 return x; 186 } 187 188 template<class P, class Q> 189 static void _copy(P* p, const Q* q, size_t n) 190 { 191 // The following employs loop unrolling for efficiency. Please do not 192 mike 1.112 // eliminate. 193 194 while (n >= 8) 195 { 196 p[0] = q[0]; 197 p[1] = q[1]; 198 p[2] = q[2]; 199 p[3] = q[3]; 200 p[4] = q[4]; 201 p[5] = q[5]; 202 p[6] = q[6]; 203 p[7] = q[7]; 204 p += 8; 205 q += 8; 206 n -= 8; 207 } 208 209 while (n >= 4) 210 { 211 p[0] = q[0]; 212 p[1] = q[1]; 213 mike 1.112 p[2] = q[2]; 214 p[3] = q[3]; 215 p += 4; 216 q += 4; 217 n -= 4; 218 } 219 220 while (n--) 221 p++ = q++; 222 } 223 224 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 225 { 226 // The following employs loop unrolling for efficiency. Please do not 227 // eliminate. 228 229 while (n >= 4) 230 { 231 if (s[0] == c) 232 return (Uint16)s; 233 if (s[1] == c) 234 mike 1.112 return (Uint16)&s[1]; 235 if (s[2] == c) 236 return (Uint16)&s[2]; 237 if (s[3] == c) 238 return (Uint16)&s[3];
239 kumpf 1.82
240 mike 1.112 n -= 4; 241 s += 4; 242 } 243 244 if (n) 245 { 246 if (s == c) 247 return (Uint16)s; 248 s++; 249 n--; 250 } 251 252 if (n) 253 { 254 if (s == c) 255 return (Uint16)s; 256 s++; 257 n--; 258 } 259 260 if (n && s == c) 261 mike 1.112 return (Uint16)s; 262 263 // Not found! 264 return 0; 265 } 266 267 static int _compare(const Uint16* s1, const Uint16* s2) 268 { 269 while (s1 && s2) 270 { 271 int r = s1++ - s2++; 272 273 if (r) 274 return r; 275 } 276 277 if (s2) 278 return -1; 279 else if (s1) 280 return 1; 281 282 mike 1.112 return 0; 283 } 284 285 static int _compareNoUTF8(const Uint16* s1, const char* s2) 286 { 287 Uint16 c1; 288 Uint16 c2; 289 290 do 291 { 292 c1 = s1++; 293 c2 = s2++; 294 295 if (c1 == 0) 296 return c1 - c2; 297 } 298 while (c1 == c2); 299 300 return c1 - c2; 301 } 302 303 mike 1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 304 { 305 memcpy(s1, s2, n * sizeof(Uint16)); 306 } 307 308 void StringThrowOutOfBounds() 309 { 310 throw IndexOutOfBoundsException(); 311 } 312 313 inline void _checkNullPointer(const void* ptr) 314 {
315 kumpf 1.117 #ifndef PEGASUS_STRING_NO_THROW
316 mike 1.112 317 if (!ptr) 318 throw NullPointer(); 319 320 #endif 321 } 322 323 static void _StringThrowBadUTF8(Uint32 index) 324 { 325 MessageLoaderParms parms( 326 "Common.String.BAD_UTF8", 327 "The byte sequence starting at index $0 " 328 "is not valid UTF-8 encoding.", 329 index); 330 throw Exception(parms); 331 } 332 333 static size_t _copyFromUTF8(
334 david.dillard 1.116 Uint16* dest, 335 const char* src,
336 mike 1.112 size_t n, 337 size_t& utf8_error_index) 338 { 339 Uint16* p = dest; 340 const Uint8* q = (const Uint8)src; 341 342 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). 343 // Use loop-unrolling. 344 345 while (n >=8 && ((q[0]\|q[1]\|q[2]\|q[3]\|q[4]\|q[5]\|q[6]\|q[7]) & 0x80) == 0) 346 { 347 p[0] = q[0]; 348 p[1] = q[1]; 349 p[2] = q[2]; 350 p[3] = q[3]; 351 p[4] = q[4]; 352 p[5] = q[5]; 353 p[6] = q[6]; 354 p[7] = q[7]; 355 p += 8; 356 q += 8; 357 mike 1.112 n -= 8; 358 } 359 360 while (n >=4 && ((q[0]\|q[1]\|q[2]\|q[3]) & 0x80) == 0) 361 { 362 p[0] = q[0]; 363 p[1] = q[1]; 364 p[2] = q[2]; 365 p[3] = q[3]; 366 p += 4; 367 q += 4; 368 n -= 4; 369 } 370 371 switch (n) 372 { 373 case 0: 374 return p - dest; 375 case 1: 376 if (q[0] < 128) 377 { 378 mike 1.112 p[0] = q[0]; 379 return p + 1 - dest; 380 } 381 break; 382 case 2: 383 if (((q[0]\|q[1]) & 0x80) == 0) 384 { 385 p[0] = q[0]; 386 p[1] = q[1]; 387 return p + 2 - dest; 388 } 389 break; 390 case 3: 391 if (((q[0]\|q[1]\|q[2]) & 0x80) == 0) 392 { 393 p[0] = q[0]; 394 p[1] = q[1]; 395 p[2] = q[2]; 396 return p + 3 - dest; 397 } 398 break; 399 mike 1.112 } 400 401 // Process remaining characters. 402 403 while (n) 404 { 405 // Optimize for 7-bit ASCII case. 406 407 if (q < 128) 408 { 409 p++ = q++; 410 n--; 411 } 412 else 413 { 414 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(q) + 1; 415 416 if (c > n \|\| !isValid_U8(q, c) \|\| 417 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 418 { 419 utf8_error_index = q - (const Uint8)src; 420 mike 1.112 return size_t(-1); 421 } 422 423 n -= c; 424 } 425 } 426 427 return p - dest; 428 } 429
430 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
431 mike 1.112 // terminator). 432 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 433 { 434 // The following employs loop unrolling for efficiency. Please do not 435 // eliminate. 436 437 const Uint16* q = src; 438 Uint8* p = (Uint8*)dest; 439 440 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
441 kumpf 1.82 {
442 mike 1.112 p[0] = q[0]; 443 p[1] = q[1]; 444 p[2] = q[2]; 445 p[3] = q[3]; 446 p += 4; 447 q += 4; 448 n -= 4;
449 kumpf 1.82 }
450 mike 1.112 451 switch (n) 452 { 453 case 0: 454 return p - (Uint8)dest; 455 case 1: 456 if (q[0] < 128) 457 { 458 p[0] = q[0]; 459 return p + 1 - (Uint8)dest; 460 } 461 break; 462 case 2: 463 if (q[0] < 128 && q[1] < 128) 464 { 465 p[0] = q[0]; 466 p[1] = q[1]; 467 return p + 2 - (Uint8)dest; 468 } 469 break; 470 case 3: 471 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 472 { 473 p[0] = q[0]; 474 p[1] = q[1]; 475 p[2] = q[2]; 476 return p + 3 - (Uint8)dest; 477 } 478 break; 479 } 480 481 // If this line was reached, there must be characters greater than 128. 482 483 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 484 485 return p - (Uint8*)dest;
486 kumpf 1.54 } 487
488 mike 1.112 static inline size_t _convert( 489 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
490 kumpf 1.54 {
491 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 492 _copy(p, q, n); 493 return n; 494 #else 495 return _copyFromUTF8(p, q, n, utf8_error_index); 496 #endif
497 kumpf 1.54 } 498
499 mike 1.112 //============================================================================== 500 // 501 // class CString 502 // 503 //============================================================================== 504 505 CString::CString(const CString& cstr) : _rep(0)
506 kumpf 1.54 {
507 mike 1.112 if (cstr._rep)
508 kumpf 1.82 {
509 mike 1.112 size_t n = strlen(cstr._rep) + 1; 510 _rep = (char*)operator new(n); 511 memcpy(_rep, cstr._rep, n);
512 kumpf 1.82 }
513 kumpf 1.54 } 514
515 kumpf 1.56 CString& CString::operator=(const CString& cstr) 516 {
517 kumpf 1.82 if (&cstr != this)
518 kumpf 1.81 {
519 kumpf 1.82 if (_rep) 520 {
521 mike 1.112 operator delete(_rep);
522 kumpf 1.82 _rep = 0; 523 }
524 mike 1.112
525 kumpf 1.82 if (cstr._rep) 526 {
527 mike 1.112 size_t n = strlen(cstr._rep) + 1; 528 _rep = (char*)operator new(n); 529 memcpy(_rep, cstr._rep, n);
530 kumpf 1.82 }
531 kumpf 1.81 }
532 mike 1.112
533 kumpf 1.56 return *this; 534 } 535
536 mike 1.112 //==============================================================================
537 kumpf 1.54 //
538 mike 1.112 // class StringRep
539 kumpf 1.39 //
540 mike 1.112 //==============================================================================
541 kumpf 1.39
542 mike 1.112 StringRep StringRep::_emptyRep;
543 mike 1.27
544 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
545 mike 1.27 {
546 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
547 dave.sudlik 1.119.2.1 548 // Check for potential overflow in cap 549 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
550 mike 1.27
551 mike 1.112 #endif
552 mike 1.27
553 mike 1.112 StringRep* rep = (StringRep)::operator new( 554 sizeof(StringRep) + cap sizeof(Uint16)); 555 rep->cap = cap; 556 new(&rep->refs) AtomicInt(1); 557 558 return rep;
559 mike 1.27 } 560
561 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
562 chuck 1.102 {
563 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
564 chuck 1.102 {
565 mike 1.112 size_t n = _roundUpToPow2(cap); 566 StringRep* newRep = StringRep::alloc(n); 567 newRep->size = rep->size; 568 _copy(newRep->data, rep->data, rep->size + 1); 569 StringRep::unref(rep); 570 rep = newRep; 571 } 572 }
573 david.dillard 1.105
574 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 575 { 576 StringRep* rep = StringRep::alloc(size); 577 rep->size = size; 578 _copy(rep->data, data, size); 579 rep->data[size] = '\0'; 580 return rep; 581 }
582 chuck 1.102
583 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 584 { 585 // Return a new copy of rep. Release rep.
586 chuck 1.102
587 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 588 newRep->size = rep->size; 589 _copy(newRep->data, rep->data, rep->size); 590 newRep->data[newRep->size] = '\0'; 591 StringRep::unref(rep); 592 return newRep;
593 chuck 1.102 } 594
595 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
596 kumpf 1.43 {
597 mike 1.112 StringRep* rep = StringRep::alloc(size); 598 size_t utf8_error_index; 599 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 600 601 #ifndef PEGASUS_STRING_NO_THROW 602 if (rep->size == size_t(-1)) 603 { 604 StringRep::free(rep); 605 _StringThrowBadUTF8(utf8_error_index); 606 } 607 #endif
608 kumpf 1.43
609 mike 1.112 rep->data[rep->size] = '\0';
610 kumpf 1.43
611 mike 1.112 return rep;
612 mike 1.27 } 613
614 mike 1.112 Uint32 StringRep::length(const Uint16* str)
615 mike 1.27 {
616 mike 1.112 // Note: We could unroll this but it is rarely called. 617 618 const Uint16* end = (Uint16)str; 619 620 while (end++) 621 ; 622 623 return end - str - 1;
624 kumpf 1.39 }
625 tony 1.66
626 mike 1.112 //============================================================================== 627 // 628 // class String 629 // 630 //============================================================================== 631 632 const String String::EMPTY;
633 mike 1.27
634 kumpf 1.39 String::String(const String& str, Uint32 n) 635 {
636 mike 1.112 _checkBounds(n, str._rep->size); 637 _rep = StringRep::create(str._rep->data, n);
638 kumpf 1.39 } 639 640 String::String(const Char16* str) 641 {
642 mike 1.112 _checkNullPointer(str); 643 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
644 mike 1.27 } 645
646 kumpf 1.39 String::String(const Char16* str, Uint32 n) 647 {
648 mike 1.112 _checkNullPointer(str); 649 _rep = StringRep::create((Uint16*)str, n);
650 kumpf 1.39 } 651 652 String::String(const char* str)
653 mike 1.27 {
654 mike 1.112 _checkNullPointer(str);
655 david.dillard 1.105
656 mike 1.112 // Set this just in case create() throws an exception. 657 _rep = &StringRep::_emptyRep; 658 _rep = StringRep::create(str, strlen(str));
659 mike 1.27 } 660
661 kumpf 1.39 String::String(const char* str, Uint32 n)
662 mike 1.27 {
663 mike 1.112 _checkNullPointer(str);
664 david.dillard 1.105
665 mike 1.112 // Set this just in case create() throws an exception. 666 _rep = &StringRep::_emptyRep; 667 _rep = StringRep::create(str, n);
668 kumpf 1.39 }
669 mike 1.27
670 mike 1.112 String::String(const String& s1, const String& s2)
671 kumpf 1.39 {
672 mike 1.112 size_t n1 = s1._rep->size; 673 size_t n2 = s2._rep->size; 674 size_t n = n1 + n2; 675 _rep = StringRep::alloc(n); 676 _copy(_rep->data, s1._rep->data, n1); 677 _copy(_rep->data + n1, s2._rep->data, n2); 678 _rep->size = n; 679 _rep->data[n] = '\0';
680 mike 1.27 } 681
682 mike 1.112 String::String(const String& s1, const char* s2)
683 mike 1.27 {
684 mike 1.112 _checkNullPointer(s2); 685 size_t n1 = s1._rep->size; 686 size_t n2 = strlen(s2); 687 _rep = StringRep::alloc(n1 + n2); 688 _copy(_rep->data, s1._rep->data, n1); 689 size_t utf8_error_index; 690 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 691 692 #ifndef PEGASUS_STRING_NO_THROW 693 if (tmp == size_t(-1))
694 kumpf 1.82 {
695 mike 1.112 StringRep::free(_rep); 696 _rep = &StringRep::_emptyRep; 697 _StringThrowBadUTF8(utf8_error_index);
698 kumpf 1.82 }
699 mike 1.112 #endif 700 701 _rep->size = n1 + tmp; 702 _rep->data[_rep->size] = '\0';
703 mike 1.27 } 704
705 mike 1.112 String::String(const char* s1, const String& s2)
706 mike 1.27 {
707 mike 1.112 _checkNullPointer(s1); 708 size_t n1 = strlen(s1); 709 size_t n2 = s2._rep->size; 710 _rep = StringRep::alloc(n1 + n2); 711 size_t utf8_error_index; 712 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 713 714 #ifndef PEGASUS_STRING_NO_THROW 715 if (tmp == size_t(-1)) 716 { 717 StringRep::free(_rep); 718 _rep = &StringRep::_emptyRep; 719 _StringThrowBadUTF8(utf8_error_index); 720 } 721 #endif 722 723 _rep->size = n2 + tmp; 724 _copy(_rep->data + n1, s2._rep->data, n2); 725 _rep->data[_rep->size] = '\0';
726 mike 1.27 } 727
728 mike 1.112 String& String::assign(const String& str)
729 mike 1.27 {
730 mike 1.112 if (_rep != str._rep)
731 david.dillard 1.105 {
732 mike 1.112 StringRep::unref(_rep); 733 StringRep::ref(_rep = str._rep);
734 david.dillard 1.105 } 735
736 mike 1.27 return this; 737 } 738 739 String& String::assign(const Char16 str, Uint32 n) 740 {
741 mike 1.112 _checkNullPointer(str); 742
743 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
744 david.dillard 1.105 {
745 mike 1.112 StringRep::unref(_rep); 746 _rep = StringRep::alloc(n);
747 david.dillard 1.105 } 748
749 mike 1.112 _rep->size = n; 750 _copy(_rep->data, (Uint16*)str, n); 751 _rep->data[n] = '\0'; 752
753 mike 1.27 return *this; 754 } 755
756 mike 1.112 String& String::assign(const char* str, Uint32 n)
757 chuck 1.102 {
758 mike 1.112 _checkNullPointer(str); 759
760 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
761 david.dillard 1.105 {
762 mike 1.112 StringRep::unref(_rep); 763 _rep = StringRep::alloc(n);
764 david.dillard 1.105 } 765
766 mike 1.112 size_t utf8_error_index; 767 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
768 chuck 1.102
769 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW 770 if (_rep->size == size_t(-1))
771 david.dillard 1.105 {
772 mike 1.112 StringRep::free(_rep); 773 _rep = &StringRep::_emptyRep; 774 _StringThrowBadUTF8(utf8_error_index);
775 david.dillard 1.105 }
776 mike 1.112 #endif 777 778 _rep->data[_rep->size] = 0;
779 david.dillard 1.105
780 mike 1.27 return *this; 781 } 782
783 kumpf 1.39 void String::clear() 784 {
785 mike 1.112 if (_rep->size) 786 {
787 mike 1.114 if (_rep->refs.get() == 1)
788 mike 1.112 { 789 _rep->size = 0; 790 _rep->data[0] = '\0'; 791 } 792 else 793 { 794 StringRep::unref(_rep); 795 _rep = &StringRep::_emptyRep; 796 } 797 }
798 kumpf 1.39 } 799
800 mike 1.112 void String::reserveCapacity(Uint32 cap)
801 kumpf 1.39 {
802 mike 1.112 _reserve(_rep, cap);
803 kumpf 1.39 } 804
805 mike 1.112 CString String::getCString() const 806 {
807 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16 808 // counterpart, so we allocate extra memory for the worst case. In the
809 mike 1.112 // best case, we may need only one third of the memory allocated. But
810 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since 811 // CString objects are usually short-lived (disappearing after only a few
812 mike 1.112 // instructions). CString objects are typically created on the stack as 813 // means to obtain a char* pointer. 814 815 #ifdef PEGASUS_STRING_NO_UTF8 816 char* str = (char*)operator new(_rep->size + 1); 817 _copy(str, _rep->data, _rep->size); 818 str[_rep->size] = '\0'; 819 return CString(str);
820 gs.keenan 1.110 #else
821 mike 1.112 Uint32 n = 3 * _rep->size; 822 char* str = (char*)operator new(n + 1); 823 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 824 str[size] = '\0'; 825 return CString(str);
826 gs.keenan 1.110 #endif
827 kumpf 1.39 } 828
829 mike 1.112 String& String::append(const Char16* str, Uint32 n)
830 kumpf 1.39 {
831 mike 1.112 _checkNullPointer(str); 832 833 size_t oldSize = _rep->size; 834 size_t newSize = oldSize + n; 835 _reserve(_rep, newSize); 836 _copy(_rep->data + oldSize, (Uint16)str, n); 837 _rep->size = newSize; 838 _rep->data[newSize] = '\0'; 839 840 return this;
841 kumpf 1.39 } 842
843 mike 1.112 String& String::append(const String& str)
844 mike 1.27 {
845 w.otsuka 1.119.2.2 return append((Char16*)(&(str._rep->data[0])), str._rep->size);
846 mike 1.27 } 847
848 mike 1.112 String& String::append(const char* str, Uint32 size)
849 mike 1.27 {
850 mike 1.112 _checkNullPointer(str); 851 852 size_t oldSize = _rep->size; 853 size_t cap = oldSize + size; 854 855 _reserve(_rep, cap); 856 size_t utf8_error_index; 857 size_t tmp = _convert( 858 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 859 860 #ifndef PEGASUS_STRING_NO_THROW 861 if (tmp == size_t(-1)) 862 { 863 StringRep::free(_rep); 864 _rep = &StringRep::_emptyRep; 865 _StringThrowBadUTF8(utf8_error_index); 866 } 867 #endif
868 mike 1.27
869 mike 1.112 _rep->size += tmp; 870 _rep->data[_rep->size] = '\0';
871 mike 1.27
872 kumpf 1.39 return *this; 873 } 874
875 mike 1.112 void String::remove(Uint32 index, Uint32 n)
876 mike 1.27 {
877 mike 1.112 if (n == PEG_NOT_FOUND) 878 n = _rep->size - index; 879 880 _checkBounds(index + n, _rep->size); 881
882 mike 1.114 if (_rep->refs.get() != 1)
883 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
884 mike 1.27
885 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
886 mike 1.27
887 mike 1.112 size_t rem = _rep->size - (index + n); 888 Uint16* data = _rep->data;
889 mike 1.27
890 mike 1.112 if (rem) 891 memmove(data + index, data + index + n, rem * sizeof(Uint16));
892 mike 1.27
893 mike 1.112 _rep->size -= n; 894 data[_rep->size] = '\0';
895 mike 1.27 } 896
897 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
898 mike 1.27 {
899 mike 1.112 // Note: this implementation is very permissive but used for 900 // backwards compatibility. 901 902 if (index < _rep->size)
903 mike 1.27 {
904 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index) 905 n = _rep->size - index;
906 mike 1.27
907 w.otsuka 1.119.2.2 return String((Char16*)(_rep->data + index), n);
908 mike 1.27 }
909 david.dillard 1.105 910 return String();
911 mike 1.27 } 912 913 Uint32 String::find(Char16 c) const 914 {
915 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
916 mike 1.27
917 mike 1.112 if (p)
918 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
919 mike 1.27 920 return PEG_NOT_FOUND; 921 } 922
923 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
924 mike 1.30 {
925 mike 1.112 _checkBounds(index, _rep->size); 926 927 if (index >= _rep->size) 928 return PEG_NOT_FOUND; 929 930 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
931 mike 1.30
932 mike 1.112 if (p)
933 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
934 mike 1.30 935 return PEG_NOT_FOUND; 936 } 937
938 mike 1.112 Uint32 StringFindAux( 939 const StringRep* _rep, const Char16* s, Uint32 n)
940 mike 1.27 {
941 mike 1.112 _checkNullPointer(s);
942 mike 1.27
943 mike 1.112 const Uint16* data = _rep->data; 944 size_t rem = _rep->size; 945 946 while (n <= rem)
947 mike 1.30 {
948 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 949 950 if (!p) 951 break;
952 mike 1.30
953 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
954 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
955 david.dillard 1.105
956 mike 1.112 p++; 957 rem -= p - data; 958 data = p;
959 mike 1.27 }
960 mike 1.112
961 mike 1.27 return PEG_NOT_FOUND; 962 } 963
964 mike 1.112 Uint32 String::find(const char* s) const 965 { 966 _checkNullPointer(s); 967 968 // Note: could optimize away creation of temporary, but this is rarely 969 // called. 970 return find(String(s)); 971 } 972
973 mike 1.27 Uint32 String::reverseFind(Char16 c) const 974 {
975 mike 1.112 Uint16 x = c; 976 Uint16* p = _rep->data; 977 Uint16* q = _rep->data + _rep->size;
978 mike 1.27
979 mike 1.112 while (q != p)
980 mike 1.27 {
981 mike 1.112 if (*--q == x)
982 david.dillard 1.116 return static_cast<Uint32>(q - p);
983 mike 1.27 } 984 985 return PEG_NOT_FOUND; 986 } 987 988 void String::toLower() 989 {
990 david 1.69 #ifdef PEGASUS_HAS_ICU
991 mike 1.112
992 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
993 david 1.90 {
994 mike 1.114 if (_rep->refs.get() != 1)
995 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 996
997 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
998 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 999 // prevents optimizations where the us-ascii is converted before
1000 mike 1.112 // calling ICU.
1001 yi.zhou 1.108 // The string may shrink or expand after the convert. 1002
1003 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 1004 //// only the size when zero is passed as the destination size argument. 1005
1006 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1007
1008 mike 1.112 int32_t newSize = u_strToLower( 1009 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1010 david.dillard 1.116
1011 mike 1.112 err = U_ZERO_ERROR; 1012 1013 //// Reserve enough space for the result. 1014 1015 if ((Uint32)newSize > _rep->cap) 1016 _reserve(_rep, newSize); 1017 1018 //// Perform the conversion (overlapping buffers are allowed).
1019 chuck 1.99
1020 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 1021 (UChar)_rep->data, _rep->size, NULL, &err);
1022 yi.zhou 1.108
1023 mike 1.112 _rep->size = newSize; 1024 return;
1025 david 1.90 }
1026 mike 1.112 1027 #endif /* PEGASUS_HAS_ICU */ 1028
1029 mike 1.114 if (_rep->refs.get() != 1)
1030 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1031 1032 Uint16* p = _rep->data; 1033 size_t n = _rep->size; 1034 1035 for (; n--; p++)
1036 david 1.90 {
1037 mike 1.112 if (!(p & 0xFF00)) 1038 p = _toLower(*p);
1039 mike 1.27 }
1040 kumpf 1.39 } 1041
1042 chuck 1.99 void String::toUpper()
1043 david 1.90 { 1044 #ifdef PEGASUS_HAS_ICU
1045 mike 1.112
1046 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1047 chuck 1.99 {
1048 mike 1.114 if (_rep->refs.get() != 1)
1049 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1050
1051 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
1052 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 1053 // prevents optimizations where the us-ascii is converted before
1054 mike 1.112 // calling ICU.
1055 yi.zhou 1.108 // The string may shrink or expand after the convert. 1056
1057 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 1058 //// only the size when zero is passed as the destination size argument. 1059
1060 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1061
1062 mike 1.112 int32_t newSize = u_strToUpper( 1063 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 1064 1065 err = U_ZERO_ERROR; 1066 1067 //// Reserve enough space for the result. 1068 1069 if ((Uint32)newSize > _rep->cap) 1070 _reserve(_rep, newSize); 1071 1072 //// Perform the conversion (overlapping buffers are allowed). 1073 1074 u_strToUpper((UChar)_rep->data, newSize, 1075 (UChar*)_rep->data, _rep->size, NULL, &err);
1076 chuck 1.99
1077 mike 1.112 _rep->size = newSize;
1078 yi.zhou 1.108
1079 mike 1.112 return;
1080 david 1.91 }
1081 mike 1.112 1082 #endif /* PEGASUS_HAS_ICU */ 1083
1084 mike 1.114 if (_rep->refs.get() != 1)
1085 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1086 1087 Uint16* p = _rep->data; 1088 size_t n = _rep->size; 1089 1090 for (; n--; p++) 1091 p = _toUpper(p);
1092 david 1.90 } 1093
1094 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
1095 kumpf 1.39 {
1096 kumpf 1.118 const Uint16* p1 = s1._rep->data; 1097 const Uint16* p2 = s2._rep->data;
1098 mike 1.27
1099 kumpf 1.118 while (n--) 1100 { 1101 int r = p1++ - p2++; 1102 if (r) 1103 { 1104 return r; 1105 } 1106 else if (!p1[-1]) 1107 { 1108 // We must have encountered a null terminator in both s1 and s2 1109 return 0; 1110 } 1111 } 1112 return 0;
1113 mike 1.27 } 1114
1115 kumpf 1.43 int String::compare(const String& s1, const String& s2)
1116 mike 1.30 {
1117 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 1118 }
1119 kumpf 1.43
1120 mike 1.112 int String::compare(const String& s1, const char* s2) 1121 { 1122 _checkNullPointer(s2);
1123 mike 1.30
1124 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 1125 return _compareNoUTF8(s1._rep->data, s2); 1126 #else 1127 // ATTN: optimize this! 1128 return String::compare(s1, String(s2)); 1129 #endif
1130 mike 1.30 } 1131
1132 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
1133 kumpf 1.40 {
1134 david 1.69 #ifdef PEGASUS_HAS_ICU
1135 mike 1.112
1136 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1137 {
1138 mike 1.112 return u_strcasecmp(
1139 dave.sudlik 1.119.2.3 (const UChar)str1._rep->data, 1140 (const UChar)str2._rep->data, 1141 U_FOLD_CASE_DEFAULT 1142 );
1143 yi.zhou 1.108 }
1144 kumpf 1.40
1145 mike 1.112 #endif /* PEGASUS_HAS_ICU / 1146 1147 const Uint16 s1 = str1._rep->data; 1148 const Uint16* s2 = str2._rep->data; 1149 1150 while (s1 && s2)
1151 kumpf 1.40 {
1152 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
1153 kumpf 1.40
1154 david.dillard 1.105 if (r) 1155 return r;
1156 kumpf 1.40 } 1157
1158 mike 1.112 if (*s2)
1159 david.dillard 1.105 return -1;
1160 mike 1.112 else if (*s1)
1161 david.dillard 1.105 return 1;
1162 kumpf 1.40 1163 return 0; 1164 } 1165
1166 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1167 mike 1.27 {
1168 mike 1.112 #ifdef PEGASUS_HAS_ICU 1169 1170 return String::compareNoCase(s1, s2) == 0; 1171 1172 #else /* PEGASUS_HAS_ICU */
1173 mike 1.27
1174 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 1175 // eliminate.
1176 kumpf 1.39
1177 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1178 Uint16 q = (Uint16*)s2.getChar16Data(); 1179 Uint32 n = s2.size(); 1180 1181 while (n >= 8) 1182 { 1183 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1184 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1185 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1186 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1187 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1188 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1189 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1190 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1191 { 1192 return false; 1193 }
1194 kumpf 1.39
1195 mike 1.112 n -= 8; 1196 p += 8; 1197 q += 8; 1198 }
1199 mike 1.27
1200 mike 1.112 while (n >= 4)
1201 kumpf 1.39 {
1202 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1203 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1204 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1205 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1206 david.dillard 1.105 {
1207 mike 1.112 return false;
1208 david.dillard 1.105 }
1209 mike 1.112 1210 n -= 4; 1211 p += 4; 1212 q += 4; 1213 } 1214 1215 while (n--) 1216 { 1217 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1218 david.dillard 1.105 return false;
1219 mike 1.112 1220 p++; 1221 q++;
1222 kumpf 1.39 }
1223 mike 1.28
1224 kumpf 1.39 return true;
1225 mike 1.112 1226 #endif /* PEGASUS_HAS_ICU */
1227 david 1.69 } 1228
1229 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1230 david 1.69 {
1231 mike 1.112 _checkNullPointer(s2);
1232 david 1.69
1233 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1234 david 1.69
1235 mike 1.112 return String::equalNoCase(s1, String(s2));
1236 david 1.69
1237 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1238 david 1.69
1239 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1240 const char p2 = s2; 1241 size_t n = s1._rep->size;
1242 david.dillard 1.105
1243 mike 1.112 while (n--) 1244 { 1245 if (!*p2) 1246 return false;
1247 david 1.71
1248 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1249 return false; 1250 }
1251 kumpf 1.42
1252 mike 1.112 if (*p2) 1253 return false;
1254 david.dillard 1.116
1255 mike 1.112 return true;
1256 karl 1.36
1257 mike 1.112 #else /* PEGASUS_HAS_ICU */
1258 david.dillard 1.105
1259 mike 1.112 // ATTN: optimize this! 1260 return String::equalNoCase(s1, String(s2));
1261 david.dillard 1.105
1262 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1263 }
1264 chuck 1.78
1265 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1266 karl 1.36 {
1267 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1268 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1269 karl 1.36 } 1270
1271 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1272 { 1273 #ifdef PEGASUS_STRING_NO_UTF8
1274 kumpf 1.35
1275 mike 1.112 _checkNullPointer(s2);
1276 kumpf 1.39
1277 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1278 const char q = s2;
1279 kumpf 1.39
1280 mike 1.112 while (p && q) 1281 { 1282 if (p++ != Uint16(q++)) 1283 return false; 1284 }
1285 kumpf 1.39
1286 mike 1.112 return !(p \|\| q);
1287 kumpf 1.39
1288 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1289 kumpf 1.39
1290 mike 1.112 return String::equal(s1, String(s2));
1291 kumpf 1.39
1292 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1293 kumpf 1.39 } 1294
1295 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1296 kumpf 1.39 {
1297 mike 1.112 #if defined(PEGASUS_OS_OS400)
1298 david 1.72
1299 david 1.93 CString cstr = str.getCString();
1300 david 1.69 const char* utf8str = cstr;
1301 mike 1.112 os << utf8str; 1302 return os;
1303 david.dillard 1.116 #else
1304 david 1.69
1305 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1306 david 1.69
1307 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1308 {
1309 david.dillard 1.105 char buf = NULL; 1310 const int size = str.size() 6;
1311 mike 1.112 UnicodeString UniStr( 1312 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1313 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1314 buf = new char[bufsize+1]; 1315 UniStr.extract(0,bufsize,buf); 1316 os << buf; 1317 os.flush(); 1318 delete [] buf;
1319 david.dillard 1.116 return os;
1320 yi.zhou 1.108 }
1321 mike 1.112
1322 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
1323 mike 1.112 1324 for (Uint32 i = 0, n = str.size(); i < n; i++)
1325 yi.zhou 1.108 {
1326 mike 1.112 Uint16 code = str[i];
1327 david.dillard 1.105
1328 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1329 os << char(code); 1330 else 1331 { 1332 // Print in hex format: 1333 char buffer[8]; 1334 sprintf(buffer, "\\x%04X", code); 1335 os << buffer;
1336 david.dillard 1.105 }
1337 yi.zhou 1.108 }
1338 kumpf 1.39 1339 return os;
1340 mike 1.112 #endif // PEGASUS_OS_OS400
1341 kumpf 1.39 } 1342
1343 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1344 kumpf 1.39 {
1345 mike 1.112 StringRep* tmp; 1346 1347 if (_rep->cap) 1348 { 1349 tmp = StringRep::alloc(2 * _rep->cap); 1350 tmp->size = _rep->size; 1351 _copy(tmp->data, _rep->data, _rep->size); 1352 } 1353 else 1354 { 1355 tmp = StringRep::alloc(8); 1356 tmp->size = 0; 1357 } 1358 1359 StringRep::unref(_rep); 1360 _rep = tmp;
1361 kumpf 1.39 } 1362
1363 mike 1.112 PEGASUS_NAMESPACE_END 1364 1365 /* 1366 ================================================================================ 1367 1368 String optimizations: 1369 1370 1. Added mechanism allowing certain functions to be inlined only when 1371 used by internal Pegasus modules. External modules (i.e., providers) 1372 link to a non-inline version, which allows for binary compatibility. 1373 1374 2. Implemented copy-on-write with atomic increment/decrement. This 1375 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1376 for the 'ni1000' benchmark. 1377 1378 3. Employed loop unrolling in several places. For example, see: 1379 1380 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1381 1382 4. Used the "empty-rep" optimization (described in whitepaper from the 1383 GCC Developers Summit). This reduced default construction to a simple 1384 mike 1.112 pointer assignment. 1385 1386 inline String::String() : _rep(&_emptyRep) { } 1387 1388 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1389 For example: 1390 1391 static const char _upper[] = 1392 { 1393 0,1,2,...255 1394 }; 1395 1396 inline Uint16 _toUpper(Uint16 x) 1397 { 1398 return (x & 0xFF00) ? x : _upper[x]; 1399 } 1400
1401 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
1402 mike 1.112 operation. 1403
1404 david.dillard 1.116 6. Implemented char* version of the following member functions to 1405 eliminate unecessary creation of anonymous string objects
1406 mike 1.112 (temporaries). 1407 1408 String(const String& s1, const char* s2); 1409 String(const char* s1, const String& s2); 1410 String& String::operator=(const char* str); 1411 Uint32 String::find(const char* s) const; 1412 bool String::equal(const String& s1, const char* s2); 1413 static int String::compare(const String& s1, const char* s2); 1414 String& String::append(const char* str); 1415 String& String::append(const char* str, Uint32 size); 1416 static bool String::equalNoCase(const String& s1, const char* s2); 1417 String& operator=(const char* str) 1418 String& String::assign(const char* str) 1419 String& String::append(const char* str) 1420 Boolean operator==(const String& s1, const char* s2) 1421 Boolean operator==(const char* s1, const String& s2) 1422 Boolean operator!=(const String& s1, const char* s2) 1423 Boolean operator!=(const char* s1, const String& s2) 1424 Boolean operator<(const String& s1, const char* s2) 1425 Boolean operator<(const char* s1, const String& s2) 1426 Boolean operator>(const String& s1, const char* s2) 1427 mike 1.112 Boolean operator>(const char* s1, const String& s2) 1428 Boolean operator<=(const String& s1, const char* s2) 1429 Boolean operator<=(const char* s1, const String& s2) 1430 Boolean operator>=(const String& s1, const char* s2) 1431 Boolean operator>=(const char* s1, const String& s2) 1432 String operator+(const String& s1, const char* s2) 1433 String operator+(const char* s1, const String& s2) 1434
1435 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
1436 mike 1.112 power of two (algorithm from the book "Hacker's Delight"). 1437 1438 static Uint32 _roundUpToPow2(Uint32 x) 1439 { 1440 if (x < 8) 1441 return 8; 1442 1443 x--; 1444 x \|= (x >> 1); 1445 x \|= (x >> 2); 1446 x \|= (x >> 4); 1447 x \|= (x >> 8); 1448 x \|= (x >> 16); 1449 x++; 1450 1451 return x; 1452 } 1453 1454 8. Implemented "concatenating constructors" to eliminate temporaries
1455 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
1456 mike 1.112 optimization" described by Stan Lippman. 1457 1458 inline String operator+(const String& s1, const String& s2) 1459 { 1460 return String(s1, s2, 0); 1461 } 1462 1463 9. Experimented to find the optimial initial size for a short string. 1464 Eight seems to offer the best tradeoff between space and time. 1465 1466 10. Inlined all members of the Char16 class. 1467 1468 11. Used Uint16 internally in the String class. This showed no improvememnt 1469 since Char16 was already fully inlined and was essentially reduced to 1470 Uint16 in any case. 1471 1472 12. Implemented conditional logic (#if) allowing error checking logic to
1473 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
1474 mike 1.112 and null-pointer checking. 1475 1476 13. Used memcpy() and memcmp() where possible. These are implemented using 1477 the rep family of intructions under Intel and are much faster. 1478
1479 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1480 mike 1.112 copy routine overhead. 1481 1482 15. Added ASCII7 form of the constructor and assign(). 1483 1484 String s("hello world", String::ASCII7); 1485 1486 s.assignASCII7("hello world"); 1487 1488 This avoids slower UTF8 processing when not needed. 1489 1490 ================================================================================ 1491 1492 TO-DO: 1493 1494 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES 1495 1496 (+) [DONE] Submit BUG-2754 (Windows buffer limit). 1497 1498 (+) [DONE] Eliminate char versions of find() and append(). 1499 1500 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h 1501 mike 1.112 1502 (+) [DONE] Change _next_pow_2() to _roundUpToPow2(). 1503 1504 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well). 1505 1506 (+) [DONE] Comment StringRep allocation layout. 1507 1508 (+) [DONE] Conceal private inline functions. 1509 1510 (+) [DONE] Shorten inclusion of StringInline.h in String.h. 1511 1512 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get 1513 rid of altogether. 1514 1515 (+) [DONE] useCamelNotationOnAllFunctionNames. 1516 1517 (+) [DONE] Check for overlow condition in StringRep::alloc(). 1518 1519 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab"). 1520 1521 (+) [DONE] Fix throw-related memory leak. 1522 mike 1.112 1523 (+) [DONE] Look at PEP223 for coding security guidelines. 1524 1525 (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1526 kumpf 1.39
1527 mike 1.112 (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1528 kumpf 1.39
1529 mike 1.112 (+) DOC++ String.h - will open new bug?
1530 kumpf 1.39
1531 mike 1.112 (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression 1532 on certain platforms).
1533 kumpf 1.39
1534 mike 1.112 ================================================================================ 1535 */

No CVS admin address has been configured