pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.119 //%2006////////////////////////////////////////////////////////////////////////
2 mike 1.27 //
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 karl 1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group.
13 mike 1.27 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
18 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions:
20 karl 1.119 //
21 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
22 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
24 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 //
32 mike 1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
33 mike 1.27 //
34 david.dillard 1.116 // Modified By:
35 mike 1.112 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) 36 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
37 david.dillard 1.116 // David Dillard, Symantec Corp. (david_dillard@symantec.com)
38 mike 1.112 // Mike Brasher (mike-brasher@austin.rr.com)
39 mike 1.27 // 40 //%///////////////////////////////////////////////////////////////////////////// 41
42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
43 mike 1.113 #include <cstring>
44 kumpf 1.48 #include "InternalException.h"
45 david 1.69 #include "CommonUTF.h"
46 mike 1.112 #include "MessageLoader.h" 47 #include "StringRep.h"
48 david 1.69 49 #ifdef PEGASUS_HAS_ICU
50 chuck 1.99 #include <unicode/ustring.h> 51 #include <unicode/uchar.h>
52 david 1.69 #endif 53
54 mike 1.112 PEGASUS_NAMESPACE_BEGIN
55 mike 1.28
56 mike 1.112 //============================================================================== 57 // 58 // Compile-time macros (undefined by default). 59 // 60 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 61 // 62 //==============================================================================
63 mike 1.27
64 mike 1.112 //==============================================================================
65 kumpf 1.39 //
66 mike 1.112 // File-scope definitions:
67 kumpf 1.54 //
68 mike 1.112 //============================================================================== 69 70 // Note: this table is much faster than the system toupper(). Please do not 71 // change.
72 kumpf 1.54
73 david.dillard 1.116 const Uint8 _toUpperTable[256] =
74 kumpf 1.54 {
75 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 76 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 77 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 78 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 79 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 80 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 81 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 82 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 83 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 84 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 85 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 86 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 87 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 88 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 89 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 90 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 91 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 92 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 93 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 94 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 95 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 96 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 97 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 98 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 99 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 100 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 101 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 102 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 103 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 104 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 105 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 106 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 107 }; 108 109 // Note: this table is much faster than the system tulower(). Please do not 110 // change. 111
112 david.dillard 1.116 const Uint8 _toLowerTable[256] =
113 mike 1.112 { 114 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 115 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 116 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 117 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 118 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 119 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 120 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 121 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 122 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 123 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 124 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 125 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 126 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 127 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 128 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 129 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 130 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 131 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 132 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 133 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 134 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 135 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 136 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 137 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 138 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 139 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 140 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 141 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 142 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 143 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 144 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 145 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 146 }; 147 148 // Converts 16-bit characters to upper case. This routine is faster than the 149 // system toupper(). Please do not change. 150 inline Uint16 _toUpper(Uint16 x) 151 { 152 return (x & 0xFF00) ? x : _toUpperTable[x];
153 kumpf 1.54 } 154
155 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the 156 // system toupper(). Please do not change. 157 inline Uint16 _toLower(Uint16 x)
158 kumpf 1.54 {
159 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x]; 160 } 161 162 // Rounds x up to the nearest power of two (or just returns 8 if x < 8). 163 static Uint32 _roundUpToPow2(Uint32 x) 164 {
165 dave.sudlik 1.120 // Check for potential overflow in x 166 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
167 mike 1.112 168 if (x < 8) 169 return 8; 170 171 x--; 172 x \|= (x >> 1); 173 x \|= (x >> 2); 174 x \|= (x >> 4); 175 x \|= (x >> 8); 176 x \|= (x >> 16); 177 x++; 178 179 return x; 180 } 181 182 template<class P, class Q> 183 static void _copy(P* p, const Q* q, size_t n) 184 { 185 // The following employs loop unrolling for efficiency. Please do not 186 // eliminate. 187 188 mike 1.112 while (n >= 8) 189 { 190 p[0] = q[0]; 191 p[1] = q[1]; 192 p[2] = q[2]; 193 p[3] = q[3]; 194 p[4] = q[4]; 195 p[5] = q[5]; 196 p[6] = q[6]; 197 p[7] = q[7]; 198 p += 8; 199 q += 8; 200 n -= 8; 201 } 202 203 while (n >= 4) 204 { 205 p[0] = q[0]; 206 p[1] = q[1]; 207 p[2] = q[2]; 208 p[3] = q[3]; 209 mike 1.112 p += 4; 210 q += 4; 211 n -= 4; 212 } 213 214 while (n--) 215 p++ = q++; 216 } 217 218 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) 219 { 220 // The following employs loop unrolling for efficiency. Please do not 221 // eliminate. 222 223 while (n >= 4) 224 { 225 if (s[0] == c) 226 return (Uint16)s; 227 if (s[1] == c) 228 return (Uint16)&s[1]; 229 if (s[2] == c) 230 mike 1.112 return (Uint16)&s[2]; 231 if (s[3] == c) 232 return (Uint16)&s[3];
233 kumpf 1.82
234 mike 1.112 n -= 4; 235 s += 4; 236 } 237 238 if (n) 239 { 240 if (s == c) 241 return (Uint16)s; 242 s++; 243 n--; 244 } 245 246 if (n) 247 { 248 if (s == c) 249 return (Uint16)s; 250 s++; 251 n--; 252 } 253 254 if (n && s == c) 255 mike 1.112 return (Uint16)s; 256 257 // Not found! 258 return 0; 259 } 260 261 static int _compare(const Uint16* s1, const Uint16* s2) 262 { 263 while (s1 && s2) 264 { 265 int r = s1++ - s2++; 266 267 if (r) 268 return r; 269 } 270 271 if (s2) 272 return -1; 273 else if (s1) 274 return 1; 275 276 mike 1.112 return 0; 277 } 278 279 static int _compareNoUTF8(const Uint16* s1, const char* s2) 280 { 281 Uint16 c1; 282 Uint16 c2; 283 284 do 285 { 286 c1 = s1++; 287 c2 = s2++; 288 289 if (c1 == 0) 290 return c1 - c2; 291 } 292 while (c1 == c2); 293 294 return c1 - c2; 295 } 296 297 mike 1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 298 { 299 memcpy(s1, s2, n * sizeof(Uint16)); 300 } 301 302 void StringThrowOutOfBounds() 303 { 304 throw IndexOutOfBoundsException(); 305 } 306 307 inline void _checkNullPointer(const void* ptr) 308 { 309 if (!ptr) 310 throw NullPointer(); 311 } 312 313 static void _StringThrowBadUTF8(Uint32 index) 314 { 315 MessageLoaderParms parms( 316 "Common.String.BAD_UTF8", 317 "The byte sequence starting at index $0 " 318 mike 1.112 "is not valid UTF-8 encoding.", 319 index); 320 throw Exception(parms); 321 } 322 323 static size_t _copyFromUTF8(
324 david.dillard 1.116 Uint16* dest, 325 const char* src,
326 mike 1.112 size_t n, 327 size_t& utf8_error_index) 328 { 329 Uint16* p = dest; 330 const Uint8* q = (const Uint8)src; 331 332 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). 333 // Use loop-unrolling. 334 335 while (n >=8 && ((q[0]\|q[1]\|q[2]\|q[3]\|q[4]\|q[5]\|q[6]\|q[7]) & 0x80) == 0) 336 { 337 p[0] = q[0]; 338 p[1] = q[1]; 339 p[2] = q[2]; 340 p[3] = q[3]; 341 p[4] = q[4]; 342 p[5] = q[5]; 343 p[6] = q[6]; 344 p[7] = q[7]; 345 p += 8; 346 q += 8; 347 mike 1.112 n -= 8; 348 } 349 350 while (n >=4 && ((q[0]\|q[1]\|q[2]\|q[3]) & 0x80) == 0) 351 { 352 p[0] = q[0]; 353 p[1] = q[1]; 354 p[2] = q[2]; 355 p[3] = q[3]; 356 p += 4; 357 q += 4; 358 n -= 4; 359 } 360 361 switch (n) 362 { 363 case 0: 364 return p - dest; 365 case 1: 366 if (q[0] < 128) 367 { 368 mike 1.112 p[0] = q[0]; 369 return p + 1 - dest; 370 } 371 break; 372 case 2: 373 if (((q[0]\|q[1]) & 0x80) == 0) 374 { 375 p[0] = q[0]; 376 p[1] = q[1]; 377 return p + 2 - dest; 378 } 379 break; 380 case 3: 381 if (((q[0]\|q[1]\|q[2]) & 0x80) == 0) 382 { 383 p[0] = q[0]; 384 p[1] = q[1]; 385 p[2] = q[2]; 386 return p + 3 - dest; 387 } 388 break; 389 mike 1.112 } 390 391 // Process remaining characters. 392 393 while (n) 394 { 395 // Optimize for 7-bit ASCII case. 396 397 if (q < 128) 398 { 399 p++ = q++; 400 n--; 401 } 402 else 403 { 404 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(q) + 1; 405 406 if (c > n \|\| !isValid_U8(q, c) \|\| 407 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 408 { 409 utf8_error_index = q - (const Uint8)src; 410 mike 1.112 return size_t(-1); 411 } 412 413 n -= c; 414 } 415 } 416 417 return p - dest; 418 } 419
420 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
421 mike 1.112 // terminator). 422 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n) 423 { 424 // The following employs loop unrolling for efficiency. Please do not 425 // eliminate. 426 427 const Uint16* q = src; 428 Uint8* p = (Uint8*)dest; 429 430 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
431 kumpf 1.82 {
432 mike 1.112 p[0] = q[0]; 433 p[1] = q[1]; 434 p[2] = q[2]; 435 p[3] = q[3]; 436 p += 4; 437 q += 4; 438 n -= 4;
439 kumpf 1.82 }
440 mike 1.112 441 switch (n) 442 { 443 case 0: 444 return p - (Uint8)dest; 445 case 1: 446 if (q[0] < 128) 447 { 448 p[0] = q[0]; 449 return p + 1 - (Uint8)dest; 450 } 451 break; 452 case 2: 453 if (q[0] < 128 && q[1] < 128) 454 { 455 p[0] = q[0]; 456 p[1] = q[1]; 457 return p + 2 - (Uint8)dest; 458 } 459 break; 460 case 3: 461 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 462 { 463 p[0] = q[0]; 464 p[1] = q[1]; 465 p[2] = q[2]; 466 return p + 3 - (Uint8)dest; 467 } 468 break; 469 } 470 471 // If this line was reached, there must be characters greater than 128. 472 473 UTF16toUTF8(&q, q + n, &p, p + 3 * n); 474 475 return p - (Uint8*)dest;
476 kumpf 1.54 } 477
478 mike 1.112 static inline size_t _convert( 479 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
480 kumpf 1.54 {
481 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 482 _copy(p, q, n); 483 return n; 484 #else 485 return _copyFromUTF8(p, q, n, utf8_error_index); 486 #endif
487 kumpf 1.54 } 488
489 mike 1.112 //============================================================================== 490 // 491 // class CString 492 // 493 //============================================================================== 494 495 CString::CString(const CString& cstr) : _rep(0)
496 kumpf 1.54 {
497 mike 1.112 if (cstr._rep)
498 kumpf 1.82 {
499 mike 1.112 size_t n = strlen(cstr._rep) + 1; 500 _rep = (char*)operator new(n); 501 memcpy(_rep, cstr._rep, n);
502 kumpf 1.82 }
503 kumpf 1.54 } 504
505 kumpf 1.56 CString& CString::operator=(const CString& cstr) 506 {
507 kumpf 1.82 if (&cstr != this)
508 kumpf 1.81 {
509 kumpf 1.82 if (_rep) 510 {
511 mike 1.112 operator delete(_rep);
512 kumpf 1.82 _rep = 0; 513 }
514 mike 1.112
515 kumpf 1.82 if (cstr._rep) 516 {
517 mike 1.112 size_t n = strlen(cstr._rep) + 1; 518 _rep = (char*)operator new(n); 519 memcpy(_rep, cstr._rep, n);
520 kumpf 1.82 }
521 kumpf 1.81 }
522 mike 1.112
523 kumpf 1.56 return *this; 524 } 525
526 mike 1.112 //==============================================================================
527 kumpf 1.54 //
528 mike 1.112 // class StringRep
529 kumpf 1.39 //
530 mike 1.112 //==============================================================================
531 kumpf 1.39
532 mike 1.112 StringRep StringRep::_emptyRep;
533 mike 1.27
534 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
535 mike 1.27 {
536 dave.sudlik 1.120 // Check for potential overflow in cap 537 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
538 mike 1.27
539 mike 1.112 StringRep* rep = (StringRep)::operator new( 540 sizeof(StringRep) + cap sizeof(Uint16)); 541 rep->cap = cap; 542 new(&rep->refs) AtomicInt(1); 543 544 return rep;
545 mike 1.27 } 546
547 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
548 chuck 1.102 {
549 mike 1.114 if (cap > rep->cap \|\| rep->refs.get() != 1)
550 chuck 1.102 {
551 mike 1.112 size_t n = _roundUpToPow2(cap); 552 StringRep* newRep = StringRep::alloc(n); 553 newRep->size = rep->size; 554 _copy(newRep->data, rep->data, rep->size + 1); 555 StringRep::unref(rep); 556 rep = newRep; 557 } 558 }
559 david.dillard 1.105
560 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size) 561 { 562 StringRep* rep = StringRep::alloc(size); 563 rep->size = size; 564 _copy(rep->data, data, size); 565 rep->data[size] = '\0'; 566 return rep; 567 }
568 chuck 1.102
569 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep) 570 { 571 // Return a new copy of rep. Release rep.
572 chuck 1.102
573 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size); 574 newRep->size = rep->size; 575 _copy(newRep->data, rep->data, rep->size); 576 newRep->data[newRep->size] = '\0'; 577 StringRep::unref(rep); 578 return newRep;
579 chuck 1.102 } 580
581 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
582 kumpf 1.43 {
583 mike 1.112 StringRep* rep = StringRep::alloc(size); 584 size_t utf8_error_index; 585 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index); 586 587 if (rep->size == size_t(-1)) 588 { 589 StringRep::free(rep); 590 _StringThrowBadUTF8(utf8_error_index); 591 }
592 kumpf 1.43
593 mike 1.112 rep->data[rep->size] = '\0';
594 kumpf 1.43
595 mike 1.112 return rep;
596 mike 1.27 } 597
598 mike 1.112 Uint32 StringRep::length(const Uint16* str)
599 mike 1.27 {
600 mike 1.112 // Note: We could unroll this but it is rarely called. 601 602 const Uint16* end = (Uint16)str; 603 604 while (end++) 605 ; 606 607 return end - str - 1;
608 kumpf 1.39 }
609 tony 1.66
610 mike 1.112 //============================================================================== 611 // 612 // class String 613 // 614 //============================================================================== 615 616 const String String::EMPTY;
617 mike 1.27
618 kumpf 1.39 String::String(const String& str, Uint32 n) 619 {
620 mike 1.112 _checkBounds(n, str._rep->size); 621 _rep = StringRep::create(str._rep->data, n);
622 kumpf 1.39 } 623 624 String::String(const Char16* str) 625 {
626 mike 1.112 _checkNullPointer(str); 627 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
628 mike 1.27 } 629
630 kumpf 1.39 String::String(const Char16* str, Uint32 n) 631 {
632 mike 1.112 _checkNullPointer(str); 633 _rep = StringRep::create((Uint16*)str, n);
634 kumpf 1.39 } 635 636 String::String(const char* str)
637 mike 1.27 {
638 mike 1.112 _checkNullPointer(str);
639 david.dillard 1.105
640 mike 1.112 // Set this just in case create() throws an exception. 641 _rep = &StringRep::_emptyRep; 642 _rep = StringRep::create(str, strlen(str));
643 mike 1.27 } 644
645 kumpf 1.39 String::String(const char* str, Uint32 n)
646 mike 1.27 {
647 mike 1.112 _checkNullPointer(str);
648 david.dillard 1.105
649 mike 1.112 // Set this just in case create() throws an exception. 650 _rep = &StringRep::_emptyRep; 651 _rep = StringRep::create(str, n);
652 kumpf 1.39 }
653 mike 1.27
654 mike 1.112 String::String(const String& s1, const String& s2)
655 kumpf 1.39 {
656 mike 1.112 size_t n1 = s1._rep->size; 657 size_t n2 = s2._rep->size; 658 size_t n = n1 + n2; 659 _rep = StringRep::alloc(n); 660 _copy(_rep->data, s1._rep->data, n1); 661 _copy(_rep->data + n1, s2._rep->data, n2); 662 _rep->size = n; 663 _rep->data[n] = '\0';
664 mike 1.27 } 665
666 mike 1.112 String::String(const String& s1, const char* s2)
667 mike 1.27 {
668 mike 1.112 _checkNullPointer(s2); 669 size_t n1 = s1._rep->size; 670 size_t n2 = strlen(s2); 671 _rep = StringRep::alloc(n1 + n2); 672 _copy(_rep->data, s1._rep->data, n1); 673 size_t utf8_error_index; 674 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index); 675 676 if (tmp == size_t(-1))
677 kumpf 1.82 {
678 mike 1.112 StringRep::free(_rep); 679 _rep = &StringRep::_emptyRep; 680 _StringThrowBadUTF8(utf8_error_index);
681 kumpf 1.82 }
682 mike 1.112 683 _rep->size = n1 + tmp; 684 _rep->data[_rep->size] = '\0';
685 mike 1.27 } 686
687 mike 1.112 String::String(const char* s1, const String& s2)
688 mike 1.27 {
689 mike 1.112 _checkNullPointer(s1); 690 size_t n1 = strlen(s1); 691 size_t n2 = s2._rep->size; 692 _rep = StringRep::alloc(n1 + n2); 693 size_t utf8_error_index; 694 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index); 695 696 if (tmp == size_t(-1)) 697 { 698 StringRep::free(_rep); 699 _rep = &StringRep::_emptyRep; 700 _StringThrowBadUTF8(utf8_error_index); 701 } 702 703 _rep->size = n2 + tmp; 704 _copy(_rep->data + n1, s2._rep->data, n2); 705 _rep->data[_rep->size] = '\0';
706 mike 1.27 } 707
708 mike 1.112 String& String::assign(const String& str)
709 mike 1.27 {
710 mike 1.112 if (_rep != str._rep)
711 david.dillard 1.105 {
712 mike 1.112 StringRep::unref(_rep); 713 StringRep::ref(_rep = str._rep);
714 david.dillard 1.105 } 715
716 mike 1.27 return this; 717 } 718 719 String& String::assign(const Char16 str, Uint32 n) 720 {
721 mike 1.112 _checkNullPointer(str); 722
723 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
724 david.dillard 1.105 {
725 mike 1.112 StringRep::unref(_rep); 726 _rep = StringRep::alloc(n);
727 david.dillard 1.105 } 728
729 mike 1.112 _rep->size = n; 730 _copy(_rep->data, (Uint16*)str, n); 731 _rep->data[n] = '\0'; 732
733 mike 1.27 return *this; 734 } 735
736 mike 1.112 String& String::assign(const char* str, Uint32 n)
737 chuck 1.102 {
738 mike 1.112 _checkNullPointer(str); 739
740 mike 1.114 if (n > _rep->cap \|\| _rep->refs.get() != 1)
741 david.dillard 1.105 {
742 mike 1.112 StringRep::unref(_rep); 743 _rep = StringRep::alloc(n);
744 david.dillard 1.105 } 745
746 mike 1.112 size_t utf8_error_index; 747 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
748 chuck 1.102
749 mike 1.112 if (_rep->size == size_t(-1))
750 david.dillard 1.105 {
751 mike 1.112 StringRep::free(_rep); 752 _rep = &StringRep::_emptyRep; 753 _StringThrowBadUTF8(utf8_error_index);
754 david.dillard 1.105 }
755 mike 1.112 756 _rep->data[_rep->size] = 0;
757 david.dillard 1.105
758 mike 1.27 return *this; 759 } 760
761 kumpf 1.39 void String::clear() 762 {
763 mike 1.112 if (_rep->size) 764 {
765 mike 1.114 if (_rep->refs.get() == 1)
766 mike 1.112 { 767 _rep->size = 0; 768 _rep->data[0] = '\0'; 769 } 770 else 771 { 772 StringRep::unref(_rep); 773 _rep = &StringRep::_emptyRep; 774 } 775 }
776 kumpf 1.39 } 777
778 mike 1.112 void String::reserveCapacity(Uint32 cap)
779 kumpf 1.39 {
780 mike 1.112 _reserve(_rep, cap);
781 kumpf 1.39 } 782
783 mike 1.112 CString String::getCString() const 784 {
785 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16 786 // counterpart, so we allocate extra memory for the worst case. In the
787 mike 1.112 // best case, we may need only one third of the memory allocated. But
788 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since 789 // CString objects are usually short-lived (disappearing after only a few
790 mike 1.112 // instructions). CString objects are typically created on the stack as 791 // means to obtain a char* pointer. 792 793 #ifdef PEGASUS_STRING_NO_UTF8 794 char* str = (char*)operator new(_rep->size + 1); 795 _copy(str, _rep->data, _rep->size); 796 str[_rep->size] = '\0'; 797 return CString(str);
798 gs.keenan 1.110 #else
799 mike 1.112 Uint32 n = 3 * _rep->size; 800 char* str = (char*)operator new(n + 1); 801 size_t size = _copyToUTF8(str, _rep->data, _rep->size); 802 str[size] = '\0'; 803 return CString(str);
804 gs.keenan 1.110 #endif
805 kumpf 1.39 } 806
807 mike 1.112 String& String::append(const Char16* str, Uint32 n)
808 kumpf 1.39 {
809 mike 1.112 _checkNullPointer(str); 810 811 size_t oldSize = _rep->size; 812 size_t newSize = oldSize + n; 813 _reserve(_rep, newSize); 814 _copy(_rep->data + oldSize, (Uint16)str, n); 815 _rep->size = newSize; 816 _rep->data[newSize] = '\0'; 817 818 return this;
819 kumpf 1.39 } 820
821 mike 1.112 String& String::append(const String& str)
822 mike 1.27 {
823 w.otsuka 1.121 return append((Char16*)(&(str._rep->data[0])), str._rep->size);
824 mike 1.27 } 825
826 mike 1.112 String& String::append(const char* str, Uint32 size)
827 mike 1.27 {
828 mike 1.112 _checkNullPointer(str); 829 830 size_t oldSize = _rep->size; 831 size_t cap = oldSize + size; 832 833 _reserve(_rep, cap); 834 size_t utf8_error_index; 835 size_t tmp = _convert( 836 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index); 837 838 if (tmp == size_t(-1)) 839 { 840 StringRep::free(_rep); 841 _rep = &StringRep::_emptyRep; 842 _StringThrowBadUTF8(utf8_error_index); 843 }
844 mike 1.27
845 mike 1.112 _rep->size += tmp; 846 _rep->data[_rep->size] = '\0';
847 mike 1.27
848 kumpf 1.39 return *this; 849 } 850
851 mike 1.112 void String::remove(Uint32 index, Uint32 n)
852 mike 1.27 {
853 mike 1.112 if (n == PEG_NOT_FOUND) 854 n = _rep->size - index; 855 856 _checkBounds(index + n, _rep->size); 857
858 mike 1.114 if (_rep->refs.get() != 1)
859 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
860 mike 1.27
861 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
862 mike 1.27
863 mike 1.112 size_t rem = _rep->size - (index + n); 864 Uint16* data = _rep->data;
865 mike 1.27
866 mike 1.112 if (rem) 867 memmove(data + index, data + index + n, rem * sizeof(Uint16));
868 mike 1.27
869 mike 1.112 _rep->size -= n; 870 data[_rep->size] = '\0';
871 mike 1.27 } 872
873 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
874 mike 1.27 {
875 mike 1.112 // Note: this implementation is very permissive but used for 876 // backwards compatibility. 877 878 if (index < _rep->size)
879 mike 1.27 {
880 mike 1.112 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index) 881 n = _rep->size - index;
882 mike 1.27
883 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
884 mike 1.27 }
885 david.dillard 1.105 886 return String();
887 mike 1.27 } 888 889 Uint32 String::find(Char16 c) const 890 {
891 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
892 mike 1.27
893 mike 1.112 if (p)
894 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
895 mike 1.27 896 return PEG_NOT_FOUND; 897 } 898
899 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
900 mike 1.30 {
901 mike 1.112 _checkBounds(index, _rep->size); 902 903 if (index >= _rep->size) 904 return PEG_NOT_FOUND; 905 906 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
907 mike 1.30
908 mike 1.112 if (p)
909 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
910 mike 1.30 911 return PEG_NOT_FOUND; 912 } 913
914 mike 1.112 Uint32 StringFindAux( 915 const StringRep* _rep, const Char16* s, Uint32 n)
916 mike 1.27 {
917 mike 1.112 _checkNullPointer(s);
918 mike 1.27
919 mike 1.112 const Uint16* data = _rep->data; 920 size_t rem = _rep->size; 921 922 while (n <= rem)
923 mike 1.30 {
924 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]); 925 926 if (!p) 927 break;
928 mike 1.30
929 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
930 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
931 david.dillard 1.105
932 mike 1.112 p++; 933 rem -= p - data; 934 data = p;
935 mike 1.27 }
936 mike 1.112
937 mike 1.27 return PEG_NOT_FOUND; 938 } 939
940 mike 1.112 Uint32 String::find(const char* s) const 941 { 942 _checkNullPointer(s); 943 944 // Note: could optimize away creation of temporary, but this is rarely 945 // called. 946 return find(String(s)); 947 } 948
949 mike 1.27 Uint32 String::reverseFind(Char16 c) const 950 {
951 mike 1.112 Uint16 x = c; 952 Uint16* p = _rep->data; 953 Uint16* q = _rep->data + _rep->size;
954 mike 1.27
955 mike 1.112 while (q != p)
956 mike 1.27 {
957 mike 1.112 if (*--q == x)
958 david.dillard 1.116 return static_cast<Uint32>(q - p);
959 mike 1.27 } 960 961 return PEG_NOT_FOUND; 962 } 963 964 void String::toLower() 965 {
966 david 1.69 #ifdef PEGASUS_HAS_ICU
967 mike 1.112
968 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
969 david 1.90 {
970 mike 1.114 if (_rep->refs.get() != 1)
971 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 972
973 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
974 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 975 // prevents optimizations where the us-ascii is converted before
976 mike 1.112 // calling ICU.
977 yi.zhou 1.108 // The string may shrink or expand after the convert. 978
979 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns 980 //// only the size when zero is passed as the destination size argument. 981
982 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 983
984 mike 1.112 int32_t newSize = u_strToLower( 985 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
986 david.dillard 1.116
987 mike 1.112 err = U_ZERO_ERROR; 988 989 //// Reserve enough space for the result. 990 991 if ((Uint32)newSize > _rep->cap) 992 _reserve(_rep, newSize); 993 994 //// Perform the conversion (overlapping buffers are allowed).
995 chuck 1.99
996 mike 1.112 u_strToLower((UChar)_rep->data, newSize, 997 (UChar)_rep->data, _rep->size, NULL, &err);
998 yi.zhou 1.108
999 mike 1.112 _rep->size = newSize; 1000 return;
1001 david 1.90 }
1002 mike 1.112 1003 #endif /* PEGASUS_HAS_ICU */ 1004
1005 mike 1.114 if (_rep->refs.get() != 1)
1006 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1007 1008 Uint16* p = _rep->data; 1009 size_t n = _rep->size; 1010 1011 for (; n--; p++)
1012 david 1.90 {
1013 mike 1.112 if (!(p & 0xFF00)) 1014 p = _toLower(*p);
1015 mike 1.27 }
1016 kumpf 1.39 } 1017
1018 chuck 1.99 void String::toUpper()
1019 david 1.90 { 1020 #ifdef PEGASUS_HAS_ICU
1021 mike 1.112
1022 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1023 chuck 1.99 {
1024 mike 1.114 if (_rep->refs.get() != 1)
1025 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1026
1027 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
1028 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this 1029 // prevents optimizations where the us-ascii is converted before
1030 mike 1.112 // calling ICU.
1031 yi.zhou 1.108 // The string may shrink or expand after the convert. 1032
1033 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns 1034 //// only the size when zero is passed as the destination size argument. 1035
1036 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 1037
1038 mike 1.112 int32_t newSize = u_strToUpper( 1039 NULL, 0, (UChar)_rep->data, _rep->size, NULL, &err); 1040 1041 err = U_ZERO_ERROR; 1042 1043 //// Reserve enough space for the result. 1044 1045 if ((Uint32)newSize > _rep->cap) 1046 _reserve(_rep, newSize); 1047 1048 //// Perform the conversion (overlapping buffers are allowed). 1049 1050 u_strToUpper((UChar)_rep->data, newSize, 1051 (UChar*)_rep->data, _rep->size, NULL, &err);
1052 chuck 1.99
1053 mike 1.112 _rep->size = newSize;
1054 yi.zhou 1.108
1055 mike 1.112 return;
1056 david 1.91 }
1057 mike 1.112 1058 #endif /* PEGASUS_HAS_ICU */ 1059
1060 mike 1.114 if (_rep->refs.get() != 1)
1061 mike 1.112 _rep = StringRep::copyOnWrite(_rep); 1062 1063 Uint16* p = _rep->data; 1064 size_t n = _rep->size; 1065 1066 for (; n--; p++) 1067 p = _toUpper(p);
1068 david 1.90 } 1069
1070 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
1071 kumpf 1.39 {
1072 kumpf 1.118 const Uint16* p1 = s1._rep->data; 1073 const Uint16* p2 = s2._rep->data;
1074 mike 1.27
1075 kumpf 1.118 while (n--) 1076 { 1077 int r = p1++ - p2++; 1078 if (r) 1079 { 1080 return r; 1081 } 1082 else if (!p1[-1]) 1083 { 1084 // We must have encountered a null terminator in both s1 and s2 1085 return 0; 1086 } 1087 } 1088 return 0;
1089 mike 1.27 } 1090
1091 kumpf 1.43 int String::compare(const String& s1, const String& s2)
1092 mike 1.30 {
1093 mike 1.112 return _compare(s1._rep->data, s2._rep->data); 1094 }
1095 kumpf 1.43
1096 mike 1.112 int String::compare(const String& s1, const char* s2) 1097 { 1098 _checkNullPointer(s2);
1099 mike 1.30
1100 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8 1101 return _compareNoUTF8(s1._rep->data, s2); 1102 #else 1103 // ATTN: optimize this! 1104 return String::compare(s1, String(s2)); 1105 #endif
1106 mike 1.30 } 1107
1108 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
1109 kumpf 1.40 {
1110 david 1.69 #ifdef PEGASUS_HAS_ICU
1111 mike 1.112
1112 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1113 {
1114 mike 1.112 return u_strcasecmp( 1115 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1116 yi.zhou 1.108 }
1117 kumpf 1.40
1118 mike 1.112 #endif /* PEGASUS_HAS_ICU / 1119 1120 const Uint16 s1 = str1._rep->data; 1121 const Uint16* s2 = str2._rep->data; 1122 1123 while (s1 && s2)
1124 kumpf 1.40 {
1125 mike 1.112 int r = _toLower(s1++) - _toLower(s2++);
1126 kumpf 1.40
1127 david.dillard 1.105 if (r) 1128 return r;
1129 kumpf 1.40 } 1130
1131 mike 1.112 if (*s2)
1132 david.dillard 1.105 return -1;
1133 mike 1.112 else if (*s1)
1134 david.dillard 1.105 return 1;
1135 kumpf 1.40 1136 return 0; 1137 } 1138
1139 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1140 mike 1.27 {
1141 mike 1.112 #ifdef PEGASUS_HAS_ICU 1142 1143 return String::compareNoCase(s1, s2) == 0; 1144 1145 #else /* PEGASUS_HAS_ICU */
1146 mike 1.27
1147 mike 1.112 // The following employs loop unrolling for efficiency. Please do not 1148 // eliminate.
1149 kumpf 1.39
1150 mike 1.112 Uint16* p = (Uint16)s1.getChar16Data(); 1151 Uint16 q = (Uint16*)s2.getChar16Data(); 1152 Uint32 n = s2.size(); 1153 1154 while (n >= 8) 1155 { 1156 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1157 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1158 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1159 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) \|\| 1160 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) \|\| 1161 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) \|\| 1162 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) \|\| 1163 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7])))) 1164 { 1165 return false; 1166 }
1167 kumpf 1.39
1168 mike 1.112 n -= 8; 1169 p += 8; 1170 q += 8; 1171 }
1172 mike 1.27
1173 mike 1.112 while (n >= 4)
1174 kumpf 1.39 {
1175 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) \|\| 1176 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) \|\| 1177 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) \|\| 1178 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1179 david.dillard 1.105 {
1180 mike 1.112 return false;
1181 david.dillard 1.105 }
1182 mike 1.112 1183 n -= 4; 1184 p += 4; 1185 q += 4; 1186 } 1187 1188 while (n--) 1189 { 1190 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1191 david.dillard 1.105 return false;
1192 mike 1.112 1193 p++; 1194 q++;
1195 kumpf 1.39 }
1196 mike 1.28
1197 kumpf 1.39 return true;
1198 mike 1.112 1199 #endif /* PEGASUS_HAS_ICU */
1200 david 1.69 } 1201
1202 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1203 david 1.69 {
1204 mike 1.112 _checkNullPointer(s2);
1205 david 1.69
1206 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1207 david 1.69
1208 mike 1.112 return String::equalNoCase(s1, String(s2));
1209 david 1.69
1210 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1211 david 1.69
1212 mike 1.112 const Uint16* p1 = (Uint16)s1._rep->data; 1213 const char p2 = s2; 1214 size_t n = s1._rep->size;
1215 david.dillard 1.105
1216 mike 1.112 while (n--) 1217 { 1218 if (!*p2) 1219 return false;
1220 david 1.71
1221 mike 1.112 if (_toUpper(p1++) != _toUpperTable[int(p2++)]) 1222 return false; 1223 }
1224 kumpf 1.42
1225 mike 1.112 if (*p2) 1226 return false;
1227 david.dillard 1.116
1228 mike 1.112 return true;
1229 karl 1.36
1230 mike 1.112 #else /* PEGASUS_HAS_ICU */
1231 david.dillard 1.105
1232 mike 1.112 // ATTN: optimize this! 1233 return String::equalNoCase(s1, String(s2));
1234 david.dillard 1.105
1235 mike 1.112 #endif /* PEGASUS_HAS_ICU */ 1236 }
1237 chuck 1.78
1238 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
1239 karl 1.36 {
1240 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1241 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1242 karl 1.36 } 1243
1244 mike 1.112 Boolean String::equal(const String& s1, const char* s2) 1245 { 1246 #ifdef PEGASUS_STRING_NO_UTF8
1247 kumpf 1.35
1248 mike 1.112 _checkNullPointer(s2);
1249 kumpf 1.39
1250 mike 1.112 const Uint16* p = (Uint16)s1._rep->data; 1251 const char q = s2;
1252 kumpf 1.39
1253 mike 1.112 while (p && q) 1254 { 1255 if (p++ != Uint16(q++)) 1256 return false; 1257 }
1258 kumpf 1.39
1259 mike 1.112 return !(p \|\| q);
1260 kumpf 1.39
1261 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1262 kumpf 1.39
1263 mike 1.112 return String::equal(s1, String(s2));
1264 kumpf 1.39
1265 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1266 kumpf 1.39 } 1267
1268 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1269 kumpf 1.39 {
1270 mike 1.112 #if defined(PEGASUS_OS_OS400)
1271 david 1.72
1272 david 1.93 CString cstr = str.getCString();
1273 david 1.69 const char* utf8str = cstr;
1274 mike 1.112 os << utf8str; 1275 return os;
1276 david.dillard 1.116 #else
1277 david 1.69
1278 mike 1.112 #if defined(PEGASUS_HAS_ICU)
1279 david 1.69
1280 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1281 {
1282 david.dillard 1.105 char buf = NULL; 1283 const int size = str.size() 6;
1284 mike 1.112 UnicodeString UniStr( 1285 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1286 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1287 buf = new char[bufsize+1]; 1288 UniStr.extract(0,bufsize,buf); 1289 os << buf; 1290 os.flush(); 1291 delete [] buf;
1292 david.dillard 1.116 return os;
1293 yi.zhou 1.108 }
1294 mike 1.112
1295 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
1296 mike 1.112 1297 for (Uint32 i = 0, n = str.size(); i < n; i++)
1298 yi.zhou 1.108 {
1299 mike 1.112 Uint16 code = str[i];
1300 david.dillard 1.105
1301 mike 1.112 if (code > 0 && !(code & 0xFF00)) 1302 os << char(code); 1303 else 1304 { 1305 // Print in hex format: 1306 char buffer[8]; 1307 sprintf(buffer, "\\x%04X", code); 1308 os << buffer;
1309 david.dillard 1.105 }
1310 yi.zhou 1.108 }
1311 kumpf 1.39 1312 return os;
1313 mike 1.112 #endif // PEGASUS_OS_OS400
1314 kumpf 1.39 } 1315
1316 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
1317 kumpf 1.39 {
1318 mike 1.112 StringRep* tmp; 1319 1320 if (_rep->cap) 1321 { 1322 tmp = StringRep::alloc(2 * _rep->cap); 1323 tmp->size = _rep->size; 1324 _copy(tmp->data, _rep->data, _rep->size); 1325 } 1326 else 1327 { 1328 tmp = StringRep::alloc(8); 1329 tmp->size = 0; 1330 } 1331 1332 StringRep::unref(_rep); 1333 _rep = tmp;
1334 kumpf 1.39 } 1335
1336 mike 1.112 PEGASUS_NAMESPACE_END 1337 1338 /* 1339 ================================================================================ 1340 1341 String optimizations: 1342 1343 1. Added mechanism allowing certain functions to be inlined only when 1344 used by internal Pegasus modules. External modules (i.e., providers) 1345 link to a non-inline version, which allows for binary compatibility. 1346 1347 2. Implemented copy-on-write with atomic increment/decrement. This 1348 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1349 for the 'ni1000' benchmark. 1350 1351 3. Employed loop unrolling in several places. For example, see: 1352 1353 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1354 1355 4. Used the "empty-rep" optimization (described in whitepaper from the 1356 GCC Developers Summit). This reduced default construction to a simple 1357 mike 1.112 pointer assignment. 1358 1359 inline String::String() : _rep(&_emptyRep) { } 1360 1361 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1362 For example: 1363 1364 static const char _upper[] = 1365 { 1366 0,1,2,...255 1367 }; 1368 1369 inline Uint16 _toUpper(Uint16 x) 1370 { 1371 return (x & 0xFF00) ? x : _upper[x]; 1372 } 1373
1374 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
1375 mike 1.112 operation. 1376
1377 david.dillard 1.116 6. Implemented char* version of the following member functions to 1378 eliminate unecessary creation of anonymous string objects
1379 mike 1.112 (temporaries). 1380 1381 String(const String& s1, const char* s2); 1382 String(const char* s1, const String& s2); 1383 String& String::operator=(const char* str); 1384 Uint32 String::find(const char* s) const; 1385 bool String::equal(const String& s1, const char* s2); 1386 static int String::compare(const String& s1, const char* s2); 1387 String& String::append(const char* str); 1388 String& String::append(const char* str, Uint32 size); 1389 static bool String::equalNoCase(const String& s1, const char* s2); 1390 String& operator=(const char* str) 1391 String& String::assign(const char* str) 1392 String& String::append(const char* str) 1393 Boolean operator==(const String& s1, const char* s2) 1394 Boolean operator==(const char* s1, const String& s2) 1395 Boolean operator!=(const String& s1, const char* s2) 1396 Boolean operator!=(const char* s1, const String& s2) 1397 Boolean operator<(const String& s1, const char* s2) 1398 Boolean operator<(const char* s1, const String& s2) 1399 Boolean operator>(const String& s1, const char* s2) 1400 mike 1.112 Boolean operator>(const char* s1, const String& s2) 1401 Boolean operator<=(const String& s1, const char* s2) 1402 Boolean operator<=(const char* s1, const String& s2) 1403 Boolean operator>=(const String& s1, const char* s2) 1404 Boolean operator>=(const char* s1, const String& s2) 1405 String operator+(const String& s1, const char* s2) 1406 String operator+(const char* s1, const String& s2) 1407
1408 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
1409 mike 1.112 power of two (algorithm from the book "Hacker's Delight"). 1410 1411 static Uint32 _roundUpToPow2(Uint32 x) 1412 { 1413 if (x < 8) 1414 return 8; 1415 1416 x--; 1417 x \|= (x >> 1); 1418 x \|= (x >> 2); 1419 x \|= (x >> 4); 1420 x \|= (x >> 8); 1421 x \|= (x >> 16); 1422 x++; 1423 1424 return x; 1425 } 1426 1427 8. Implemented "concatenating constructors" to eliminate temporaries
1428 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
1429 mike 1.112 optimization" described by Stan Lippman. 1430 1431 inline String operator+(const String& s1, const String& s2) 1432 { 1433 return String(s1, s2, 0); 1434 } 1435 1436 9. Experimented to find the optimial initial size for a short string. 1437 Eight seems to offer the best tradeoff between space and time. 1438 1439 10. Inlined all members of the Char16 class. 1440 1441 11. Used Uint16 internally in the String class. This showed no improvememnt 1442 since Char16 was already fully inlined and was essentially reduced to 1443 Uint16 in any case. 1444 1445 12. Implemented conditional logic (#if) allowing error checking logic to
1446 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
1447 mike 1.112 and null-pointer checking. 1448 1449 13. Used memcpy() and memcmp() where possible. These are implemented using 1450 the rep family of intructions under Intel and are much faster. 1451
1452 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1453 mike 1.112 copy routine overhead. 1454 1455 15. Added ASCII7 form of the constructor and assign(). 1456 1457 String s("hello world", String::ASCII7); 1458 1459 s.assignASCII7("hello world"); 1460 1461 This avoids slower UTF8 processing when not needed. 1462 1463 ================================================================================ 1464 1465 TO-DO: 1466 1467 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES 1468 1469 (+) [DONE] Submit BUG-2754 (Windows buffer limit). 1470 1471 (+) [DONE] Eliminate char versions of find() and append(). 1472 1473 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h 1474 mike 1.112 1475 (+) [DONE] Change _next_pow_2() to _roundUpToPow2(). 1476 1477 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well). 1478 1479 (+) [DONE] Comment StringRep allocation layout. 1480 1481 (+) [DONE] Conceal private inline functions. 1482 1483 (+) [DONE] Shorten inclusion of StringInline.h in String.h. 1484 1485 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get 1486 rid of altogether. 1487 1488 (+) [DONE] useCamelNotationOnAllFunctionNames. 1489 1490 (+) [DONE] Check for overlow condition in StringRep::alloc(). 1491 1492 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab"). 1493 1494 (+) [DONE] Fix throw-related memory leak. 1495 mike 1.112 1496 (+) [DONE] Look at PEP223 for coding security guidelines. 1497 1498 (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1499 kumpf 1.39
1500 mike 1.112 (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1501 kumpf 1.39
1502 mike 1.112 (+) DOC++ String.h - will open new bug?
1503 kumpf 1.39
1504 mike 1.112 (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression 1505 on certain platforms).
1506 kumpf 1.39
1507 mike 1.112 ================================================================================ 1508 */

No CVS admin address has been configured