![]() ![]() |
![]() |
1 karl 1.98 //%2005//////////////////////////////////////////////////////////////////////// | ||
2 mike 1.27 // | ||
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.; | ||
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group. | ||
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. | ||
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group. | ||
11 mike 1.27 // 12 // Permission is hereby granted, free of charge, to any person obtaining a copy | ||
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 14 // deal in the Software without restriction, including without limitation the 15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 17 // furnished to do so, subject to the following conditions: | ||
18 david.dillard 1.105 // | ||
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN | ||
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT | ||
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 // 28 //============================================================================== 29 // 30 // Author: Mike Brasher (mbrasher@bmc.com) 31 // | ||
32 mike 1.111.6.1 // Modified By: 33 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) 34 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297 35 // David Dillard, VERITAS Software Corp. (david.dillard@veritas.com) 36 // Mike Brasher (mike-brasher@austin.rr.com) | ||
37 mike 1.27 // 38 //%///////////////////////////////////////////////////////////////////////////// 39 | ||
40 mike 1.111.6.1 #include <cassert> | ||
41 kumpf 1.48 #include "InternalException.h" | ||
42 david 1.69 #include "CommonUTF.h" | ||
43 mike 1.111.6.1 #include "MessageLoader.h" | ||
44 mike 1.111.6.2 #include "StringRep.h" | ||
45 david 1.69 46 #ifdef PEGASUS_HAS_ICU | ||
47 chuck 1.99 #include <unicode/ustring.h> 48 #include <unicode/uchar.h> | ||
49 david 1.69 #endif 50 | ||
51 mike 1.27 PEGASUS_NAMESPACE_BEGIN 52 | ||
53 mike 1.111.6.1 //============================================================================== 54 // 55 // Compile-time macros (undefined by default). 56 // 57 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions 58 // 59 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 60 // 61 // PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature. 62 // 63 //============================================================================== 64 65 //============================================================================== | ||
66 kumpf 1.39 // | ||
67 mike 1.111.6.1 // File-scope definitions: | ||
68 kumpf 1.54 // | ||
69 mike 1.111.6.1 //============================================================================== 70 71 const Uint8 _to_upper_tbl[256] = 72 { 73 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 74 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 75 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 76 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 77 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 78 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 79 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 80 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 81 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 82 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 83 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 84 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 85 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 86 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 87 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 88 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F, 89 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 90 mike 1.111.6.1 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 91 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 92 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 93 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 94 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 95 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 96 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 97 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 98 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 99 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 100 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 101 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 102 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 103 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 104 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 105 }; 106 107 const Uint8 _to_lower_tbl[256] = 108 { 109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 111 mike 1.111.6.1 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 120 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F, 121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 132 mike 1.111.6.1 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, 141 }; | ||
142 kumpf 1.54 | ||
143 mike 1.111.6.1 // Converts 16-bit characters to upper case. 144 inline Uint16 _to_upper(Uint16 x) | ||
145 kumpf 1.54 { | ||
146 mike 1.111.6.1 return (x & 0xFF00) ? x : _to_upper_tbl[x]; | ||
147 kumpf 1.54 } 148 | ||
149 mike 1.111.6.1 // Converts 16-bit characters to lower case. 150 inline Uint16 _to_lower(Uint16 x) | ||
151 kumpf 1.54 { | ||
152 mike 1.111.6.1 return (x & 0xFF00) ? x : _to_lower_tbl[x]; 153 } | ||
154 kumpf 1.82 | ||
155 mike 1.111.6.1 // Rounds x to the next power of two (or just returns 8 if x < 8). 156 static Uint32 _next_pow_2(Uint32 x) 157 { 158 if (x < 8) 159 return 8; 160 161 x--; 162 x |= (x >> 1); 163 x |= (x >> 2); 164 x |= (x >> 4); 165 x |= (x >> 8); 166 x |= (x >> 16); 167 x++; 168 169 return x; 170 } 171 172 template<class P, class Q> 173 static void _copy(P* p, const Q* q, size_t n) 174 { 175 // Use loop unrolling. 176 mike 1.111.6.1 177 while (n >= 8) | ||
178 kumpf 1.82 { | ||
179 mike 1.111.6.1 p[0] = q[0]; 180 p[1] = q[1]; 181 p[2] = q[2]; 182 p[3] = q[3]; 183 p[4] = q[4]; 184 p[5] = q[5]; 185 p[6] = q[6]; 186 p[7] = q[7]; 187 p += 8; 188 q += 8; 189 n -= 8; | ||
190 kumpf 1.82 } | ||
191 mike 1.111.6.1 192 while (n >= 4) 193 { 194 p[0] = q[0]; 195 p[1] = q[1]; 196 p[2] = q[2]; 197 p[3] = q[3]; 198 p += 4; 199 q += 4; 200 n -= 4; 201 } 202 203 while (n--) 204 *p++ = *q++; | ||
205 kumpf 1.54 } 206 | ||
207 mike 1.111.6.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) | ||
208 kumpf 1.54 { | ||
209 mike 1.111.6.1 while (n >= 4) 210 { 211 if (s[0] == c) 212 return (Uint16*)s; 213 if (s[1] == c) 214 return (Uint16*)&s[1]; 215 if (s[2] == c) 216 return (Uint16*)&s[2]; 217 if (s[3] == c) 218 return (Uint16*)&s[3]; 219 220 n -= 4; 221 s += 4; 222 } 223 224 if (n) 225 { 226 if (*s == c) 227 return (Uint16*)s; 228 s++; 229 n--; 230 mike 1.111.6.1 } 231 232 if (n) 233 { 234 if (*s == c) 235 return (Uint16*)s; 236 s++; 237 n--; 238 } 239 240 if (n && *s == c) 241 return (Uint16*)s; 242 243 // Not found! 244 return 0; | ||
245 kumpf 1.54 } 246 | ||
247 mike 1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2) | ||
248 kumpf 1.54 { | ||
249 mike 1.111.6.1 while (*s1 && *s2) | ||
250 kumpf 1.82 { | ||
251 mike 1.111.6.1 int r = *s1++ - *s2++; 252 253 if (r) 254 return r; | ||
255 kumpf 1.82 } | ||
256 mike 1.111.6.1 257 if (*s2) 258 return -1; 259 else if (*s1) 260 return 1; 261 262 return 0; | ||
263 kumpf 1.54 } 264 | ||
265 mike 1.111.6.1 static int _compare_no_utf8(const Uint16* s1, const char* s2) | ||
266 kumpf 1.56 { | ||
267 mike 1.111.6.1 Uint16 c1; 268 Uint16 c2; 269 270 do | ||
271 kumpf 1.81 { | ||
272 mike 1.111.6.1 c1 = *s1++; 273 c2 = *s2++; 274 275 if (c1 == 0) 276 return c1 - c2; | ||
277 kumpf 1.81 } | ||
278 mike 1.111.6.1 while (c1 == c2); 279 280 return c1 - c2; | ||
281 kumpf 1.56 } 282 | ||
283 mike 1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n) | ||
284 kumpf 1.54 { | ||
285 mike 1.111.6.1 // This should only be called when s1 and s2 have the same length. 286 287 while (n-- && (*s1++ - *s2++) == 0) 288 ; 289 290 // 291 292 return s1[-1] - s2[-1]; | ||
293 kumpf 1.54 } 294 | ||
295 mike 1.111.6.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 296 { 297 memcpy(s1, s2, n * sizeof(Uint16)); 298 } | ||
299 kumpf 1.39 | ||
300 mike 1.111.6.1 void String_throw_out_of_bounds() 301 { 302 throw IndexOutOfBoundsException(); 303 } | ||
304 mike 1.27 | ||
305 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_THROW 306 # define _check_null_pointer(ARG) /* empty */ 307 #else 308 template<class T> 309 inline void _check_null_pointer(const T* ptr) | ||
310 mike 1.27 { | ||
311 mike 1.111.6.1 if (!ptr) 312 throw NullPointer(); 313 } 314 #endif 315 316 static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n) 317 { 318 Uint16* p = dest; 319 const Uint8* q = (const Uint8*)src; | ||
320 mike 1.27 | ||
321 mike 1.111.6.1 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). 322 // Use loop-unrolling. | ||
323 mike 1.27 | ||
324 mike 1.111.6.1 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0) 325 { 326 p[0] = q[0]; 327 p[1] = q[1]; 328 p[2] = q[2]; 329 p[3] = q[3]; 330 p[4] = q[4]; 331 p[5] = q[5]; 332 p[6] = q[6]; 333 p[7] = q[7]; 334 p += 8; 335 q += 8; 336 n -= 8; 337 } 338 339 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0) 340 { 341 p[0] = q[0]; 342 p[1] = q[1]; 343 p[2] = q[2]; 344 p[3] = q[3]; 345 mike 1.111.6.1 p += 4; 346 q += 4; 347 n -= 4; 348 } 349 350 switch (n) 351 { 352 case 0: 353 return p - dest; 354 case 1: 355 if (q[0] < 128) 356 { 357 p[0] = q[0]; 358 return p + 1 - dest; 359 } 360 break; 361 case 2: 362 if (((q[0]|q[1]) & 0x80) == 0) 363 { 364 p[0] = q[0]; 365 p[1] = q[1]; 366 mike 1.111.6.1 return p + 2 - dest; 367 } 368 break; 369 case 3: 370 if (((q[0]|q[1]|q[2]) & 0x80) == 0) 371 { 372 p[0] = q[0]; 373 p[1] = q[1]; 374 p[2] = q[2]; 375 return p + 3 - dest; 376 } 377 break; 378 } 379 380 // Process remaining characters. 381 382 while (n) 383 { 384 // Optimize for 7-bit ASCII case. 385 386 if (*q < 128) 387 mike 1.111.6.1 { 388 *p++ = *q++; 389 n--; 390 } 391 else 392 { 393 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1; 394 395 if (c > n || !isValid_U8(q, c) || 396 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 397 { 398 MessageLoaderParms parms("Common.String.BAD_UTF8", 399 "The byte sequence starting at index $0 " 400 "is not valid UTF-8 encoding.", 401 q - (const Uint8*)src); 402 throw Exception(parms); 403 } 404 405 n -= c; 406 } 407 } 408 mike 1.111.6.1 409 return p - dest; 410 } 411 412 // Note: dest must be at least three times src (plus an extra byte for 413 // terminator). 414 static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n) 415 { 416 const Uint16* q = src; 417 Uint8* p = (Uint8*)dest; 418 419 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128) 420 { 421 p[0] = q[0]; 422 p[1] = q[1]; 423 p[2] = q[2]; 424 p[3] = q[3]; 425 p += 4; 426 q += 4; 427 n -= 4; 428 } 429 mike 1.111.6.1 430 switch (n) 431 { 432 case 0: 433 return p - (Uint8*)dest; 434 case 1: 435 if (q[0] < 128) 436 { 437 p[0] = q[0]; 438 return p + 1 - (Uint8*)dest; 439 } 440 break; 441 case 2: 442 if (q[0] < 128 && q[1] < 128) 443 { 444 p[0] = q[0]; 445 p[1] = q[1]; 446 return p + 2 - (Uint8*)dest; 447 } 448 break; 449 case 3: 450 mike 1.111.6.1 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 451 { 452 p[0] = q[0]; 453 p[1] = q[1]; 454 p[2] = q[2]; 455 return p + 3 - (Uint8*)dest; 456 } 457 break; 458 } 459 460 // If this line was reached, there must be characters greater than 128. 461 462 UTF16toUTF8(&q, q + n, &p, p + 3 * n); | ||
463 mike 1.27 | ||
464 mike 1.111.6.1 return p - (Uint8*)dest; 465 } 466 467 static inline size_t _convert(Uint16* p, const char* q, size_t n) 468 { 469 #ifdef PEGASUS_STRING_NO_UTF8 470 _copy(p, q, n); | ||
471 mike 1.27 return n; | ||
472 mike 1.111.6.1 #else 473 return _copy_from_utf8(p, q, n); 474 #endif | ||
475 mike 1.27 } 476 | ||
477 mike 1.111.6.1 //============================================================================== | ||
478 chuck 1.102 // | ||
479 mike 1.111.6.1 // class CString | ||
480 chuck 1.102 // | ||
481 mike 1.111.6.1 //============================================================================== 482 483 CString::CString(const CString& cstr) : _rep(0) | ||
484 chuck 1.102 { | ||
485 mike 1.111.6.1 if (cstr._rep) | ||
486 chuck 1.102 { | ||
487 mike 1.111.6.1 size_t n = strlen(cstr._rep) + 1; 488 _rep = (char*)operator new(n); 489 memcpy(_rep, cstr._rep, n); 490 } 491 } 492 493 CString& CString::operator=(const CString& cstr) 494 { 495 if (&cstr != this) 496 { 497 if (_rep) | ||
498 david.dillard 1.105 { | ||
499 mike 1.111.6.1 operator delete(_rep); 500 _rep = 0; | ||
501 chuck 1.102 } | ||
502 mike 1.111.6.1 503 if (cstr._rep) | ||
504 chuck 1.102 { | ||
505 mike 1.111.6.1 size_t n = strlen(cstr._rep) + 1; 506 _rep = (char*)operator new(n); 507 memcpy(_rep, cstr._rep, n); | ||
508 chuck 1.102 } | ||
509 mike 1.111.6.1 } | ||
510 chuck 1.102 | ||
511 mike 1.111.6.1 return *this; 512 } 513 514 //============================================================================== 515 // 516 // class StringRep 517 // 518 //============================================================================== 519 520 StringRep StringRep::_empty_rep; | ||
521 chuck 1.102 | ||
522 mike 1.111.6.1 inline StringRep* StringRep::alloc(size_t cap) 523 { 524 StringRep* rep = (StringRep*)::operator new( 525 sizeof(StringRep) + cap * sizeof(Uint16)); 526 rep->cap = cap; 527 Atomic_create(&rep->refs, 1); 528 529 return rep; | ||
530 chuck 1.102 } 531 | ||
532 mike 1.111.6.1 static inline void _reserve(StringRep*& rep, Uint32 cap) | ||
533 kumpf 1.43 { | ||
534 mike 1.111.6.1 if (cap > rep->cap || Atomic_get(&rep->refs) != 1) 535 { 536 size_t n = _next_pow_2(cap); 537 StringRep* new_rep = StringRep::alloc(n); 538 new_rep->size = rep->size; 539 _copy(new_rep->data, rep->data, rep->size + 1); 540 StringRep::unref(rep); 541 rep = new_rep; 542 } 543 } | ||
544 kumpf 1.43 | ||
545 mike 1.111.6.1 StringRep* StringRep::create(const Uint16* data, size_t size) 546 { 547 StringRep* rep = StringRep::alloc(size); 548 rep->size = size; 549 _copy(rep->data, data, size); 550 rep->data[size] = '\0'; 551 return rep; 552 } 553 554 StringRep* StringRep::copy_on_write(StringRep* rep) 555 { 556 // Return a new copy of rep. Release rep. 557 558 StringRep* new_rep = StringRep::alloc(rep->size); 559 new_rep->size = rep->size; 560 _copy(new_rep->data, rep->data, rep->size); 561 new_rep->data[new_rep->size] = '\0'; 562 StringRep::unref(rep); 563 return new_rep; 564 } | ||
565 kumpf 1.43 | ||
566 mike 1.111.6.1 StringRep* StringRep::create(const char* data, size_t size) | ||
567 mike 1.27 { | ||
568 mike 1.111.6.1 StringRep* rep = StringRep::alloc(size); 569 rep->size = _convert((Uint16*)rep->data, data, size); 570 rep->data[rep->size] = '\0'; 571 572 return rep; | ||
573 mike 1.27 } 574 | ||
575 mike 1.111.6.1 StringRep* StringRep::createASCII7(const char* data, size_t size) | ||
576 mike 1.27 { | ||
577 mike 1.111.6.1 StringRep* rep = StringRep::alloc(size); 578 _copy((Uint16*)rep->data, data, size); 579 rep->data[rep->size = size] = '\0'; 580 return rep; | ||
581 kumpf 1.39 } | ||
582 tony 1.66 | ||
583 mike 1.111.6.1 Uint32 StringRep::length(const Uint16* str) 584 { 585 // Note: We could unroll this but it is rarely called. 586 587 const Uint16* end = (Uint16*)str; 588 589 while (*end++) 590 ; 591 592 return end - str - 1; 593 } 594 595 //============================================================================== 596 // 597 // class String 598 // 599 //============================================================================== 600 601 const String String::EMPTY; | ||
602 mike 1.27 | ||
603 kumpf 1.39 String::String(const String& str, Uint32 n) 604 { | ||
605 mike 1.111.6.1 _check_bounds(n, str._rep->size); 606 _rep = StringRep::create(str._rep->data, n); | ||
607 kumpf 1.39 } 608 609 String::String(const Char16* str) 610 { | ||
611 mike 1.111.6.1 _check_null_pointer(str); 612 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str)); | ||
613 mike 1.27 } 614 | ||
615 kumpf 1.39 String::String(const Char16* str, Uint32 n) 616 { | ||
617 mike 1.111.6.1 _check_null_pointer(str); 618 _rep = StringRep::create((Uint16*)str, n); | ||
619 kumpf 1.39 } 620 621 String::String(const char* str) | ||
622 mike 1.27 { | ||
623 mike 1.111.6.1 _check_null_pointer(str); 624 _rep = StringRep::create(str, strlen(str)); 625 } | ||
626 david.dillard 1.105 | ||
627 mike 1.111.6.1 String::String(const char* str, String::ASCII7Tag tag) 628 { 629 _check_null_pointer(str); 630 _rep = StringRep::createASCII7(str, strlen(str)); | ||
631 mike 1.27 } 632 | ||
633 kumpf 1.39 String::String(const char* str, Uint32 n) | ||
634 mike 1.27 { | ||
635 mike 1.111.6.1 _check_null_pointer(str); 636 _rep = StringRep::create(str, n); 637 } | ||
638 david.dillard 1.105 | ||
639 mike 1.111.6.1 String::String(const char* str, size_t n, String::ASCII7Tag tag) 640 { 641 _check_null_pointer(str); 642 _rep = StringRep::createASCII7(str, n); | ||
643 kumpf 1.39 } | ||
644 mike 1.27 | ||
645 mike 1.111.6.1 String::String(const String& s1, const String& s2) | ||
646 kumpf 1.39 { | ||
647 mike 1.111.6.1 size_t n1 = s1._rep->size; 648 size_t n2 = s2._rep->size; 649 size_t n = n1 + n2; 650 _rep = StringRep::alloc(n); 651 _copy(_rep->data, s1._rep->data, n1); 652 _copy(_rep->data + n1, s2._rep->data, n2); 653 _rep->size = n; 654 _rep->data[n] = '\0'; | ||
655 mike 1.27 } 656 | ||
657 mike 1.111.6.1 String::String(const String& s1, const char* s2) | ||
658 mike 1.27 { | ||
659 mike 1.111.6.1 _check_null_pointer(s2); 660 size_t n1 = s1._rep->size; 661 size_t n2 = strlen(s2); 662 _rep = StringRep::alloc(n1 + n2); 663 _copy(_rep->data, s1._rep->data, n1); 664 _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2); 665 _rep->data[_rep->size] = '\0'; | ||
666 mike 1.27 } 667 | ||
668 mike 1.111.6.1 String::String(const char* s1, const String& s2) | ||
669 mike 1.27 { | ||
670 mike 1.111.6.1 _check_null_pointer(s1); 671 size_t n1 = strlen(s1); 672 size_t n2 = s2._rep->size; 673 _rep = StringRep::alloc(n1 + n2); 674 _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1); 675 _copy(_rep->data + n1, s2._rep->data, n2); 676 _rep->data[_rep->size] = '\0'; | ||
677 mike 1.27 } 678 | ||
679 mike 1.111.6.1 String& String::assign(const String& str) | ||
680 mike 1.27 { | ||
681 mike 1.111.6.1 if (_rep != str._rep) | ||
682 david.dillard 1.105 { | ||
683 mike 1.111.6.1 StringRep::unref(_rep); 684 StringRep::ref(_rep = str._rep); | ||
685 david.dillard 1.105 } 686 | ||
687 mike 1.27 return *this; 688 } 689 690 String& String::assign(const Char16* str, Uint32 n) 691 { | ||
692 mike 1.111.6.1 _check_null_pointer(str); 693 694 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) | ||
695 david.dillard 1.105 { | ||
696 mike 1.111.6.1 StringRep::unref(_rep); 697 _rep = StringRep::alloc(n); | ||
698 david.dillard 1.105 } 699 | ||
700 mike 1.111.6.1 _rep->size = n; 701 _copy(_rep->data, (Uint16*)str, n); 702 _rep->data[n] = '\0'; 703 | ||
704 mike 1.27 return *this; 705 } 706 | ||
707 mike 1.111.6.1 String& String::assign(const char* str, Uint32 n) | ||
708 chuck 1.102 { | ||
709 mike 1.111.6.1 _check_null_pointer(str); 710 711 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) | ||
712 david.dillard 1.105 { | ||
713 mike 1.111.6.1 StringRep::unref(_rep); 714 _rep = StringRep::alloc(n); | ||
715 david.dillard 1.105 } 716 | ||
717 mike 1.111.6.1 _rep->size = _convert(_rep->data, str, n); 718 _rep->data[_rep->size] = 0; 719 | ||
720 chuck 1.102 return *this; 721 } 722 | ||
723 mike 1.111.6.1 String& String::assignASCII7(const char* str, Uint32 n) | ||
724 mike 1.27 { | ||
725 mike 1.111.6.1 _check_null_pointer(str); 726 727 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1) | ||
728 david.dillard 1.105 { | ||
729 mike 1.111.6.1 StringRep::unref(_rep); 730 _rep = StringRep::alloc(n); | ||
731 david.dillard 1.105 } 732 | ||
733 mike 1.111.6.1 _copy(_rep->data, str, n); 734 _rep->data[_rep->size = n] = 0; 735 | ||
736 mike 1.27 return *this; 737 } 738 | ||
739 kumpf 1.39 void String::clear() 740 { | ||
741 mike 1.111.6.1 if (_rep->size) 742 { 743 if (Atomic_get(&_rep->refs) == 1) 744 _rep->size = 0; 745 else 746 { 747 StringRep::unref(_rep); 748 _rep = &StringRep::_empty_rep; 749 } 750 } | ||
751 kumpf 1.39 } 752 | ||
753 mike 1.111.6.1 void String::reserveCapacity(Uint32 cap) | ||
754 kumpf 1.39 { | ||
755 mike 1.111.6.1 _reserve(_rep, cap); | ||
756 kumpf 1.39 } 757 | ||
758 mike 1.111.6.1 CString String::getCString() const 759 { 760 #ifdef PEGASUS_STRING_NO_UTF8 761 char* str = (char*)operator new(_rep->size + 1); 762 _copy(str, _rep->data, _rep->size); 763 str[_rep->size] = '\0'; 764 return CString(str); | ||
765 gs.keenan 1.110 #else | ||
766 mike 1.111.6.1 Uint32 n = 3 * _rep->size; 767 char* str = (char*)operator new(n + 1); 768 size_t size = _copy_to_utf8(str, _rep->data, _rep->size); 769 str[size] = '\0'; 770 return CString(str); | ||
771 gs.keenan 1.110 #endif | ||
772 kumpf 1.39 } 773 | ||
774 mike 1.111.6.1 String& String::append(const Char16* str, Uint32 n) | ||
775 kumpf 1.39 { | ||
776 mike 1.111.6.1 _check_null_pointer(str); | ||
777 kumpf 1.39 | ||
778 mike 1.111.6.1 size_t old_size = _rep->size; 779 size_t new_size = old_size + n; 780 _reserve(_rep, new_size); 781 _copy(_rep->data + old_size, (Uint16*)str, n); 782 _rep->size = new_size; 783 _rep->data[new_size] = '\0'; | ||
784 mike 1.27 | ||
785 mike 1.111.6.1 return *this; | ||
786 mike 1.27 } 787 | ||
788 mike 1.111.6.1 String& String::append(const String& str) | ||
789 mike 1.27 { | ||
790 mike 1.111.6.1 return append((Char16*)str._rep->data, str._rep->size); | ||
791 mike 1.27 } 792 | ||
793 mike 1.111.6.1 String& String::append(const char* str, Uint32 size) | ||
794 kumpf 1.39 { | ||
795 mike 1.111.6.1 _check_null_pointer(str); 796 797 size_t old_size = _rep->size; 798 size_t cap = old_size + size; 799 800 _reserve(_rep, cap); 801 _rep->size += _convert((Uint16*)_rep->data + old_size, str, size); 802 _rep->data[_rep->size] = '\0'; | ||
803 kumpf 1.39 | ||
804 mike 1.27 return *this; 805 } 806 | ||
807 mike 1.111.6.1 void String::remove(Uint32 index, Uint32 n) | ||
808 mike 1.27 { | ||
809 mike 1.111.6.1 if (n == PEG_NOT_FOUND) 810 n = _rep->size - index; | ||
811 mike 1.27 | ||
812 mike 1.111.6.1 _check_bounds(index + n, _rep->size); | ||
813 mike 1.27 | ||
814 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1) 815 _rep = StringRep::copy_on_write(_rep); | ||
816 mike 1.27 | ||
817 mike 1.111.6.1 assert(index + n <= _rep->size); 818 819 size_t rem = _rep->size - (index + n); 820 Uint16* data = _rep->data; 821 822 if (rem) 823 memmove(data + index, data + index + n, rem * sizeof(Uint16)); 824 825 _rep->size -= n; 826 data[_rep->size] = '\0'; | ||
827 mike 1.27 } 828 | ||
829 mike 1.111.6.1 String String::subString(Uint32 index, Uint32 n) const | ||
830 mike 1.27 { | ||
831 mike 1.111.6.1 // Note: this implementation is very permissive but used for 832 // backwards compatibility. 833 834 if (index < _rep->size) | ||
835 mike 1.27 { | ||
836 mike 1.111.6.1 if (n == PEG_NOT_FOUND || n > _rep->size - index) 837 n = _rep->size - index; | ||
838 mike 1.27 | ||
839 mike 1.111.6.1 return String((Char16*)_rep->data + index, n); | ||
840 mike 1.27 } | ||
841 david.dillard 1.105 842 return String(); | ||
843 mike 1.27 } 844 845 Uint32 String::find(Char16 c) const 846 { | ||
847 mike 1.111.6.1 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c); | ||
848 mike 1.27 | ||
849 mike 1.111.6.1 if (p) 850 return p - _rep->data; | ||
851 mike 1.27 852 return PEG_NOT_FOUND; 853 } 854 | ||
855 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const | ||
856 mike 1.30 { | ||
857 mike 1.111.6.1 _check_bounds(index, _rep->size); | ||
858 mike 1.30 | ||
859 mike 1.111.6.1 if (index >= _rep->size) 860 return PEG_NOT_FOUND; 861 862 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c); 863 864 if (p) 865 return p - _rep->data; | ||
866 mike 1.30 867 return PEG_NOT_FOUND; 868 } 869 | ||
870 mike 1.111.6.1 Uint32 String::_find_aux(const Char16* s, Uint32 n) const | ||
871 mike 1.27 { | ||
872 mike 1.111.6.1 _check_null_pointer(s); | ||
873 mike 1.27 | ||
874 mike 1.111.6.1 const Uint16* data = _rep->data; 875 size_t rem = _rep->size; | ||
876 mike 1.30 | ||
877 mike 1.111.6.1 while (n <= rem) | ||
878 mike 1.27 { | ||
879 mike 1.111.6.1 Uint16* p = (Uint16*)_find(data, rem, s[0]); | ||
880 david.dillard 1.105 | ||
881 mike 1.111.6.1 if (!p) 882 break; 883 884 if (memcmp(p, s, n * sizeof(Uint16)) == 0) 885 return p - _rep->data; 886 887 p++; 888 rem -= p - data; 889 data = p; | ||
890 mike 1.27 } | ||
891 mike 1.111.6.1 | ||
892 mike 1.27 return PEG_NOT_FOUND; 893 } 894 | ||
895 mike 1.111.6.1 Uint32 String::find(const char* s) const 896 { 897 _check_null_pointer(s); 898 899 // Note: could optimize away creation of temporary, but this is rarely 900 // called. 901 return find(String(s)); 902 } 903 | ||
904 mike 1.27 Uint32 String::reverseFind(Char16 c) const 905 { | ||
906 mike 1.111.6.1 Uint16 x = c; 907 Uint16* p = _rep->data; 908 Uint16* q = _rep->data + _rep->size; | ||
909 mike 1.27 | ||
910 mike 1.111.6.1 while (q != p) | ||
911 mike 1.27 { | ||
912 mike 1.111.6.1 if (*--q == x) 913 return q - p; | ||
914 mike 1.27 } 915 916 return PEG_NOT_FOUND; 917 } 918 919 void String::toLower() 920 { | ||
921 david 1.69 #ifdef PEGASUS_HAS_ICU | ||
922 mike 1.111.6.1 | ||
923 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) | ||
924 david 1.90 { | ||
925 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1) 926 _rep = StringRep::copy_on_write(_rep); 927 | ||
928 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert. | ||
929 mike 1.111.6.1 // Since context-sensitive casing looks at adjacent chars, this 930 // prevents optimizations where the us-ascii is converted before 931 // calling ICU. | ||
932 yi.zhou 1.108 // The string may shrink or expand after the convert. 933 | ||
934 mike 1.111.6.1 //// First calculate size of resulting string. u_strToLower() returns 935 //// only the size when zero is passed as the destination size argument. 936 | ||
937 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 938 | ||
939 mike 1.111.6.1 int32_t new_size = u_strToLower( 940 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err); 941 942 err = U_ZERO_ERROR; | ||
943 chuck 1.99 | ||
944 mike 1.111.6.1 //// Reserve enough space for the result. 945 946 if ((Uint32)new_size > _rep->cap) 947 _reserve(_rep, new_size); 948 949 //// Perform the conversion (overlapping buffers are allowed). | ||
950 yi.zhou 1.108 | ||
951 mike 1.111.6.1 u_strToLower((UChar*)_rep->data, new_size, 952 (UChar*)_rep->data, _rep->size, NULL, &err); 953 954 _rep->size = new_size; 955 return; | ||
956 david 1.90 } | ||
957 mike 1.111.6.1 958 #endif /* PEGASUS_HAS_ICU */ 959 960 if (Atomic_get(&_rep->refs) != 1) 961 _rep = StringRep::copy_on_write(_rep); 962 963 Uint16* p = _rep->data; 964 size_t n = _rep->size; 965 966 for (; n--; p++) | ||
967 david 1.90 { | ||
968 mike 1.111.6.1 if (!(*p & 0xFF00)) 969 *p = _to_lower(*p); | ||
970 mike 1.27 } | ||
971 kumpf 1.39 } 972 | ||
973 chuck 1.99 void String::toUpper() | ||
974 david 1.90 { 975 #ifdef PEGASUS_HAS_ICU | ||
976 mike 1.111.6.1 | ||
977 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) | ||
978 chuck 1.99 { | ||
979 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1) 980 _rep = StringRep::copy_on_write(_rep); 981 | ||
982 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert. | ||
983 mike 1.111.6.1 // Since context-sensitive casing looks at adjacent chars, this 984 // prevents optimizations where the us-ascii is converted before 985 // calling ICU. | ||
986 yi.zhou 1.108 // The string may shrink or expand after the convert. 987 | ||
988 mike 1.111.6.1 //// First calculate size of resulting string. u_strToUpper() returns 989 //// only the size when zero is passed as the destination size argument. 990 | ||
991 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 992 | ||
993 mike 1.111.6.1 int32_t new_size = u_strToUpper( 994 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err); | ||
995 chuck 1.99 | ||
996 mike 1.111.6.1 err = U_ZERO_ERROR; | ||
997 yi.zhou 1.108 | ||
998 mike 1.111.6.1 //// Reserve enough space for the result. | ||
999 david 1.90 | ||
1000 mike 1.111.6.1 if ((Uint32)new_size > _rep->cap) 1001 _reserve(_rep, new_size); | ||
1002 kumpf 1.39 | ||
1003 mike 1.111.6.1 //// Perform the conversion (overlapping buffers are allowed). | ||
1004 mike 1.27 | ||
1005 mike 1.111.6.1 u_strToUpper((UChar*)_rep->data, new_size, 1006 (UChar*)_rep->data, _rep->size, NULL, &err); 1007 1008 _rep->size = new_size; 1009 1010 return; | ||
1011 mike 1.27 } 1012 | ||
1013 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */ 1014 1015 if (Atomic_get(&_rep->refs) != 1) 1016 _rep = StringRep::copy_on_write(_rep); 1017 1018 Uint16* p = _rep->data; 1019 size_t n = _rep->size; 1020 1021 for (; n--; p++) 1022 *p = _to_upper(*p); | ||
1023 mike 1.27 } 1024 | ||
1025 mike 1.111.6.1 int String::compare(const String& s1, const String& s2, Uint32 n) | ||
1026 mike 1.30 { | ||
1027 mike 1.111.6.1 assert(n <= s1._rep->size); 1028 assert(n <= s2._rep->size); | ||
1029 kumpf 1.43 | ||
1030 mike 1.111.6.1 // Ignoring error in which n is greater than s1.size() or s2.size() 1031 return _compare(s1._rep->data, s2._rep->data, n); 1032 } | ||
1033 mike 1.30 | ||
1034 mike 1.111.6.1 int String::compare(const String& s1, const String& s2) 1035 { 1036 return _compare(s1._rep->data, s2._rep->data); 1037 } | ||
1038 mike 1.30 | ||
1039 mike 1.111.6.1 int String::compare(const String& s1, const char* s2) 1040 { 1041 _check_null_pointer(s2); | ||
1042 mike 1.30 | ||
1043 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8 1044 return _compare_no_utf8(s1._rep->data, s2); 1045 #else 1046 // ATTN: optimize this! 1047 return String::compare(s1, String(s2)); 1048 #endif | ||
1049 mike 1.30 } 1050 | ||
1051 mike 1.111.6.1 int String::compareNoCase(const String& str1, const String& str2) | ||
1052 kumpf 1.40 { | ||
1053 david 1.69 #ifdef PEGASUS_HAS_ICU | ||
1054 mike 1.111.6.1 | ||
1055 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1056 { | ||
1057 mike 1.111.6.1 return u_strcasecmp( 1058 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT); | ||
1059 yi.zhou 1.108 } | ||
1060 kumpf 1.40 | ||
1061 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */ | ||
1062 kumpf 1.40 | ||
1063 mike 1.111.6.1 const Uint16* s1 = str1._rep->data; 1064 const Uint16* s2 = str2._rep->data; 1065 1066 while (*s1 && *s2) 1067 { 1068 int r = _to_lower(*s1++) - _to_lower(*s2++); | ||
1069 kumpf 1.40 | ||
1070 david.dillard 1.105 if (r) 1071 return r; | ||
1072 kumpf 1.40 } 1073 | ||
1074 mike 1.111.6.1 if (*s2) | ||
1075 david.dillard 1.105 return -1; | ||
1076 mike 1.111.6.1 else if (*s1) | ||
1077 david.dillard 1.105 return 1; | ||
1078 kumpf 1.40 1079 return 0; 1080 } 1081 | ||
1082 mike 1.111.6.1 Boolean String::equalNoCase_aux(const String& s1, const String& s2) | ||
1083 mike 1.27 { | ||
1084 david 1.69 #ifdef PEGASUS_HAS_ICU | ||
1085 kumpf 1.39 | ||
1086 mike 1.111.6.1 return String::compareNoCase(s1, s2) == 0; | ||
1087 kumpf 1.39 | ||
1088 mike 1.111.6.1 #else /* PEGASUS_HAS_ICU */ 1089 1090 Uint16* p = (Uint16*)s1._rep->data; 1091 Uint16* q = (Uint16*)s2._rep->data; 1092 Uint32 n = s2._rep->size; 1093 1094 while (n >= 8) 1095 { 1096 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) || 1097 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) || 1098 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) || 1099 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) || 1100 ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) || 1101 ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) || 1102 ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) || 1103 ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7])))) 1104 { 1105 return false; 1106 } 1107 1108 n -= 8; 1109 mike 1.111.6.1 p += 8; 1110 q += 8; 1111 } 1112 1113 while (n >= 4) 1114 { 1115 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) || 1116 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) || 1117 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) || 1118 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3])))) 1119 { 1120 return false; 1121 } 1122 1123 n -= 4; 1124 p += 4; 1125 q += 4; 1126 } | ||
1127 mike 1.27 | ||
1128 kumpf 1.39 while (n--) 1129 { | ||
1130 mike 1.111.6.1 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0])))) 1131 return false; 1132 1133 p++; 1134 q++; | ||
1135 kumpf 1.39 } | ||
1136 mike 1.28 | ||
1137 kumpf 1.39 return true; | ||
1138 david 1.69 | ||
1139 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */ 1140 } | ||
1141 mike 1.27 | ||
1142 mike 1.111.6.1 Boolean String::equalNoCase(const String& s1, const char* s2) | ||
1143 david 1.69 { | ||
1144 mike 1.111.6.1 _check_null_pointer(s2); | ||
1145 david 1.69 | ||
1146 mike 1.111.6.1 #if defined(PEGASUS_HAS_ICU) | ||
1147 david 1.69 | ||
1148 mike 1.111.6.1 return String::equalNoCase(s1, String(s2)); | ||
1149 david.dillard 1.105 | ||
1150 mike 1.111.6.1 #elif defined(PEGASUS_STRING_NO_UTF8) | ||
1151 david 1.71 | ||
1152 mike 1.111.6.1 const Uint16* p1 = (Uint16*)s1._rep->data; 1153 const char* p2 = s2; 1154 size_t n = s1._rep->size; | ||
1155 kumpf 1.42 | ||
1156 mike 1.111.6.1 while (n--) 1157 { 1158 if (!*p2) 1159 return false; | ||
1160 karl 1.36 | ||
1161 mike 1.111.6.1 if (_to_upper(*p1++) != _to_upper_tbl[int(*p2++)]) 1162 return false; 1163 } | ||
1164 david.dillard 1.105 | ||
1165 mike 1.111.6.1 if (*p2) 1166 return false; 1167 1168 return true; | ||
1169 david.dillard 1.105 | ||
1170 mike 1.111.6.1 #else /* PEGASUS_HAS_ICU */ | ||
1171 chuck 1.78 | ||
1172 mike 1.111.6.1 // ATTN: optimize this! 1173 return String::equalNoCase(s1, String(s2)); | ||
1174 david.dillard 1.105 | ||
1175 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */ 1176 } | ||
1177 karl 1.36 | ||
1178 mike 1.111.6.1 Boolean String::equal(const String& s1, const String& s2) | ||
1179 karl 1.36 { | ||
1180 mike 1.111.6.1 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 1181 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0; | ||
1182 karl 1.36 } 1183 | ||
1184 mike 1.111.6.1 Boolean String::equal(const String& s1, const char* s2) | ||
1185 karl 1.36 { | ||
1186 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8 | ||
1187 kumpf 1.39 | ||
1188 mike 1.111.6.1 _check_null_pointer(s2); | ||
1189 kumpf 1.39 | ||
1190 mike 1.111.6.1 const Uint16* p = (Uint16*)s1._rep->data; 1191 const char* q = s2; | ||
1192 kumpf 1.39 | ||
1193 mike 1.111.6.1 while (*p && *q) 1194 { 1195 if (*p++ != Uint16(*q++)) 1196 return false; 1197 } | ||
1198 kumpf 1.39 | ||
1199 mike 1.111.6.1 return !(*p || *q); | ||
1200 kumpf 1.39 | ||
1201 mike 1.111.6.1 #else /* PEGASUS_STRING_NO_UTF8 */ | ||
1202 kumpf 1.39 | ||
1203 mike 1.111.6.1 return String::equal(s1, String(s2)); 1204 1205 #endif /* PEGASUS_STRING_NO_UTF8 */ | ||
1206 kumpf 1.39 } 1207 | ||
1208 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str) | ||
1209 kumpf 1.39 { | ||
1210 david 1.69 #if defined(PEGASUS_OS_OS400) | ||
1211 mike 1.111.6.1 | ||
1212 david 1.93 CString cstr = str.getCString(); | ||
1213 david 1.69 const char* utf8str = cstr; 1214 os << utf8str; | ||
1215 mike 1.111.6.1 return os; 1216 #else 1217 1218 #if defined(PEGASUS_HAS_ICU) | ||
1219 david 1.69 | ||
1220 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1221 { | ||
1222 david.dillard 1.105 char *buf = NULL; 1223 const int size = str.size() * 6; | ||
1224 mike 1.111.6.1 UnicodeString UniStr( 1225 (const UChar *)str.getChar16Data(), (int32_t)str.size()); | ||
1226 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1227 buf = new char[bufsize+1]; 1228 UniStr.extract(0,bufsize,buf); 1229 os << buf; 1230 os.flush(); 1231 delete [] buf; | ||
1232 mike 1.111.6.1 return os; | ||
1233 yi.zhou 1.108 } | ||
1234 mike 1.111.6.1 1235 #endif // PEGASUS_HAS_ICU 1236 1237 for (Uint32 i = 0, n = str.size(); i < n; i++) | ||
1238 yi.zhou 1.108 { | ||
1239 mike 1.111.6.1 Uint16 code = str[i]; | ||
1240 david.dillard 1.105 | ||
1241 mike 1.111.6.1 if (code > 0 && !(code & 0xFF00)) 1242 os << char(code); 1243 else 1244 { 1245 // Print in hex format: 1246 char buffer[8]; 1247 sprintf(buffer, "\\x%04X", code); 1248 os << buffer; | ||
1249 david.dillard 1.105 } | ||
1250 yi.zhou 1.108 } | ||
1251 kumpf 1.39 1252 return os; | ||
1253 mike 1.111.6.1 #endif // PEGASUS_OS_OS400 | ||
1254 kumpf 1.39 } 1255 | ||
1256 mike 1.111.6.1 void String::_append_char_aux() | ||
1257 kumpf 1.39 { | ||
1258 mike 1.111.6.1 StringRep* tmp; | ||
1259 kumpf 1.39 | ||
1260 mike 1.111.6.1 if (_rep->cap) 1261 { 1262 tmp = StringRep::alloc(2 * _rep->cap); 1263 tmp->size = _rep->size; 1264 _copy(tmp->data, _rep->data, _rep->size); 1265 } 1266 else 1267 { 1268 tmp = StringRep::alloc(8); 1269 tmp->size = 0; 1270 } | ||
1271 kumpf 1.39 | ||
1272 mike 1.111.6.1 StringRep::unref(_rep); 1273 _rep = tmp; | ||
1274 kumpf 1.39 } 1275 | ||
1276 mike 1.111.6.1 PEGASUS_NAMESPACE_END | ||
1277 kumpf 1.39 | ||
1278 mike 1.111.6.1 /* 1279 ================================================================================ | ||
1280 kumpf 1.39 | ||
1281 mike 1.111.6.1 String optimizations: 1282 1283 1. Added mechanism allowing certain functions to be inlined only when 1284 used by internal Pegasus modules. External modules (i.e., providers) 1285 link to a non-inline version, which allows for binary compatibility. 1286 1287 2. Implemented copy-on-write with atomic increment/decrement. This 1288 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1289 for the 'ni1000' benchmark. 1290 1291 3. Employed loop unrolling in several places. For example, see: 1292 1293 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1294 1295 4. Used the "empty-rep" optimization (described in whitepaper from the 1296 GCC Developers Summit). This reduced default construction to a simple 1297 pointer assignment. 1298 1299 inline String::String() : _rep(&_empty_rep) { } 1300 1301 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1302 mike 1.111.6.1 For example: 1303 1304 static const char _upper[] = 1305 { 1306 0,1,2,...255 1307 }; 1308 1309 inline Uint16 _to_upper(Uint16 x) 1310 { 1311 return (x & 0xFF00) ? x : _upper[x]; 1312 } 1313 1314 This outperforms the system implementation by avoiding an anding 1315 operation. 1316 1317 6. Implemented char* version of the following member functions to 1318 eliminate unecessary creation of anonymous string objects 1319 (temporaries). 1320 1321 String(const String& s1, const char* s2); 1322 String(const char* s1, const String& s2); 1323 mike 1.111.6.1 String& String::operator=(const char* str); 1324 Uint32 String::find(const char* s) const; 1325 bool String::equal(const String& s1, const char* s2); 1326 static int String::compare(const String& s1, const char* s2); 1327 String& String::append(const char* str); 1328 String& String::append(const char* str, Uint32 size); 1329 static bool String::equalNoCase(const String& s1, const char* s2); 1330 String& operator=(const char* str) 1331 String& String::assign(const char* str) 1332 String& String::append(const char* str) 1333 Boolean operator==(const String& s1, const char* s2) 1334 Boolean operator==(const char* s1, const String& s2) 1335 Boolean operator!=(const String& s1, const char* s2) 1336 Boolean operator!=(const char* s1, const String& s2) 1337 Boolean operator<(const String& s1, const char* s2) 1338 Boolean operator<(const char* s1, const String& s2) 1339 Boolean operator>(const String& s1, const char* s2) 1340 Boolean operator>(const char* s1, const String& s2) 1341 Boolean operator<=(const String& s1, const char* s2) 1342 Boolean operator<=(const char* s1, const String& s2) 1343 Boolean operator>=(const String& s1, const char* s2) 1344 mike 1.111.6.1 Boolean operator>=(const char* s1, const String& s2) 1345 String operator+(const String& s1, const char* s2) 1346 String operator+(const char* s1, const String& s2) 1347 1348 7. Optimized _next_pow_2(), used in rounding the capacity to the next 1349 power of two (algorithm from the book "Hacker's Delight"). 1350 1351 static Uint32 _next_pow_2(Uint32 x) 1352 { 1353 if (x < 8) 1354 return 8; 1355 1356 x--; 1357 x |= (x >> 1); 1358 x |= (x >> 2); 1359 x |= (x >> 4); 1360 x |= (x >> 8); 1361 x |= (x >> 16); 1362 x++; 1363 1364 return x; 1365 mike 1.111.6.1 } 1366 1367 8. Implemented "concatenating constructors" to eliminate temporaries 1368 created by operator+(). This scheme employs the "return-value 1369 optimization" described by Stan Lippman. 1370 1371 inline String operator+(const String& s1, const String& s2) 1372 { 1373 return String(s1, s2, 0); 1374 } 1375 1376 9. Experimented to find the optimial initial size for a short string. 1377 Eight seems to offer the best tradeoff between space and time. 1378 1379 10. Inlined all members of the Char16 class. 1380 1381 11. Used Uint16 internally in the String class. This showed no improvememnt 1382 since Char16 was already fully inlined and was essentially reduced to 1383 Uint16 in any case. 1384 1385 12. Implemented conditional logic (#if) allowing error checking logic to 1386 mike 1.111.6.1 be excluded to better performance. Examples include bounds checking 1387 and null-pointer checking. 1388 1389 13. Used memcpy() and memcmp() where possible. These are implemented using 1390 the rep family of intructions under Intel and are much faster. 1391 1392 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 1393 copy routine overhead. 1394 1395 15. Added ASCII7 form of the constructor and assign(). 1396 1397 String s("hello world", String::ASCII7); 1398 1399 s.assignASCII7("hello world"); 1400 1401 This avoids slower UTF8 processing when not needed. 1402 1403 BUG-4200 Review actions: 1404 1405 1. Use PEGASUS_USE_EXPERIMENTAL_INTERFACES instead of 1406 PEGASUS_STRING_EXTENSIONS. 1407 mike 1.111.6.1 1408 Status: done 1409 1410 2. Doc++ String.h 1411 1412 Status: pending 1413 1414 3. Look at PEP223 for security coding guidelines for strings. 1415 1416 Status: pending 1417 1418 4. Increasing the number of objects may break Windows 2000 build 1419 (limit of 2048 bytes for command line). See BUG-2754 1420 1421 Status: looking into the use auto-generated linker files. 1422 1423 5. Concerns about whether generating inlines and non-inline versions 1424 of functions will work with all compilers. 1425 1426 Status: confident it will work on platforms except maybe Windows. 1427 1428 mike 1.111.6.1 ================================================================================ 1429 */ |
No CVS admin address has been configured |
Powered by ViewCVS 0.9.2 |