pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
2 mike 1.27 //
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 mike 1.27 // 12 // Permission is hereby granted, free of charge, to any person obtaining a copy
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 14 // deal in the Software without restriction, including without limitation the 15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 17 // furnished to do so, subject to the following conditions:
18 david.dillard 1.105 //
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 // 28 //============================================================================== 29 // 30 // Author: Mike Brasher (mbrasher@bmc.com) 31 //
32 mike 1.111.2.9 // Modified By: 33 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com) 34 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297 35 // David Dillard, VERITAS Software Corp. (david.dillard@veritas.com) 36 // Mike Brasher (mike-brasher@austin.rr.com) 37 //
38 mike 1.27 //%///////////////////////////////////////////////////////////////////////////// 39
40 mike 1.111.2.10 #ifndef PEGASUS_USE_INTERNAL_INLINES 41 # define PEGASUS_USE_INTERNAL_INLINES 42 #endif 43
44 mike 1.27 #include "String.h"
45 mike 1.111.2.6 #include <cassert>
46 kumpf 1.48 #include "InternalException.h"
47 david 1.69 #include "CommonUTF.h"
48 chuck 1.111.2.12 #include "MessageLoader.h"
49 mike 1.111.2.1 #include "CharSet.h"
50 david 1.69
51 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
52 chuck 1.99 #include <unicode/ustring.h> 53 #include <unicode/uchar.h>
54 david 1.69 #endif 55
56 mike 1.27 PEGASUS_NAMESPACE_BEGIN 57
58 mike 1.111.2.1 //==============================================================================
59 kumpf 1.39 //
60 mike 1.111.2.7 // Compile-time macros (undefined by default).
61 mike 1.111.2.6 // 62 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions 63 // 64 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 65 // 66 // PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature. 67 // 68 //============================================================================== 69 70 //============================================================================== 71 //
72 mike 1.111.2.1 // File-scope definitions:
73 kumpf 1.54 //
74 mike 1.111.2.1 //==============================================================================
75 kumpf 1.54
76 mike 1.111.2.1 // Converts 16-bit characters to upper case. 77 inline Uint16 _to_upper(Uint16 x)
78 kumpf 1.54 {
79 mike 1.111.2.1 return (x & 0xFF00) ? x : CharSet::to_upper(x);
80 kumpf 1.54 } 81
82 mike 1.111.2.1 // Converts 16-bit characters to lower case. 83 inline Uint16 _to_lower(Uint16 x)
84 kumpf 1.54 {
85 mike 1.111.2.1 return (x & 0xFF00) ? x : CharSet::to_lower(x); 86 }
87 kumpf 1.82
88 mike 1.111.2.1 // Rounds x to the next power of two (or just returns 8 if x < 8). 89 static Uint32 _next_pow_2(Uint32 x) 90 { 91 if (x < 8) 92 return 8; 93 94 x--; 95 x \|= (x >> 1); 96 x \|= (x >> 2); 97 x \|= (x >> 4); 98 x \|= (x >> 8); 99 x \|= (x >> 16); 100 x++; 101 102 return x; 103 } 104 105 template<class P, class Q> 106 static void _copy(P* p, const Q* q, size_t n) 107 { 108 // Use loop unrolling. 109 mike 1.111.2.1 110 while (n >= 8) 111 { 112 p[0] = q[0]; 113 p[1] = q[1]; 114 p[2] = q[2]; 115 p[3] = q[3]; 116 p[4] = q[4]; 117 p[5] = q[5]; 118 p[6] = q[6]; 119 p[7] = q[7]; 120 p += 8; 121 q += 8; 122 n -= 8; 123 } 124 125 while (n >= 4)
126 kumpf 1.82 {
127 mike 1.111.2.1 p[0] = q[0]; 128 p[1] = q[1]; 129 p[2] = q[2]; 130 p[3] = q[3]; 131 p += 4; 132 q += 4; 133 n -= 4;
134 kumpf 1.82 }
135 mike 1.111.2.1 136 while (n--) 137 p++ = q++;
138 kumpf 1.54 } 139
140 mike 1.111.2.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
141 kumpf 1.54 {
142 mike 1.111.2.1 while (n >= 4) 143 { 144 if (s[0] == c) 145 return (Uint16)s; 146 if (s[1] == c) 147 return (Uint16)&s[1]; 148 if (s[2] == c) 149 return (Uint16)&s[2]; 150 if (s[3] == c) 151 return (Uint16)&s[3]; 152 153 n -= 4; 154 s += 4; 155 } 156 157 if (n) 158 { 159 if (s == c) 160 return (Uint16)s; 161 s++; 162 n--; 163 mike 1.111.2.1 } 164 165 if (n) 166 { 167 if (s == c) 168 return (Uint16)s; 169 s++; 170 n--; 171 } 172 173 if (n && s == c) 174 return (Uint16)s; 175 176 // Not found! 177 return 0;
178 kumpf 1.54 } 179
180 mike 1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2)
181 kumpf 1.54 {
182 mike 1.111.2.1 while (s1 && s2)
183 kumpf 1.82 {
184 mike 1.111.2.1 int r = s1++ - s2++; 185 186 if (r) 187 return r;
188 kumpf 1.82 }
189 mike 1.111.2.1 190 if (s2) 191 return -1; 192 else if (s1) 193 return 1; 194 195 return 0;
196 kumpf 1.54 } 197
198 mike 1.111.2.1 static int _compare_no_utf8(const Uint16* s1, const char* s2)
199 kumpf 1.56 {
200 mike 1.111.2.1 Uint16 c1; 201 Uint16 c2; 202 203 do
204 kumpf 1.81 {
205 mike 1.111.2.1 c1 = s1++; 206 c2 = s2++; 207 208 if (c1 == 0) 209 return c1 - c2;
210 kumpf 1.81 }
211 mike 1.111.2.1 while (c1 == c2); 212 213 return c1 - c2;
214 kumpf 1.56 } 215
216 mike 1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
217 kumpf 1.54 {
218 mike 1.111.2.1 // This should only be called when s1 and s2 have the same length. 219 220 while (n-- && (s1++ - s2++) == 0) 221 ; 222
223 mike 1.111.2.8 // 224
225 mike 1.111.2.1 return s1[-1] - s2[-1];
226 kumpf 1.54 } 227
228 mike 1.111.2.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 229 { 230 memcpy(s1, s2, n * sizeof(Uint16)); 231 }
232 kumpf 1.39
233 mike 1.111.2.1 void String_throw_out_of_bounds() 234 { 235 throw IndexOutOfBoundsException(); 236 } 237 238 #ifdef PEGASUS_STRING_NO_THROW 239 # define _check_null_pointer(ARG) /* empty / 240 #else 241 template<class T> 242 inline void _check_null_pointer(const T ptr) 243 { 244 if (!ptr) 245 throw NullPointer(); 246 } 247 #endif
248 mike 1.27
249 mike 1.111.2.1 static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)
250 mike 1.27 {
251 mike 1.111.2.1 Uint16* p = dest; 252 const Uint8* q = (const Uint8*)src;
253 mike 1.111.2.2
254 mike 1.111.2.11 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later). 255 // Use loop-unrolling. 256 257 while (n >=8 && ((q[0]\|q[1]\|q[2]\|q[3]\|q[4]\|q[5]\|q[6]\|q[7]) & 0x80) == 0) 258 { 259 p[0] = q[0]; 260 p[1] = q[1]; 261 p[2] = q[2]; 262 p[3] = q[3]; 263 p[4] = q[4]; 264 p[5] = q[5]; 265 p[6] = q[6]; 266 p[7] = q[7]; 267 p += 8; 268 q += 8; 269 n -= 8; 270 }
271 mike 1.111.2.2
272 mike 1.111.2.10 while (n >=4 && ((q[0]\|q[1]\|q[2]\|q[3]) & 0x80) == 0)
273 mike 1.111.2.2 { 274 p[0] = q[0]; 275 p[1] = q[1]; 276 p[2] = q[2]; 277 p[3] = q[3]; 278 p += 4; 279 q += 4; 280 n -= 4; 281 } 282 283 switch (n) 284 { 285 case 0: 286 return p - dest; 287 case 1: 288 if (q[0] < 128) 289 { 290 p[0] = q[0]; 291 return p + 1 - dest; 292 } 293 break; 294 mike 1.111.2.2 case 2:
295 mike 1.111.2.11 if (((q[0]\|q[1]) & 0x80) == 0)
296 mike 1.111.2.2 { 297 p[0] = q[0]; 298 p[1] = q[1]; 299 return p + 2 - dest; 300 } 301 break; 302 case 3:
303 mike 1.111.2.11 if (((q[0]\|q[1]\|q[2]) & 0x80) == 0)
304 mike 1.111.2.2 { 305 p[0] = q[0]; 306 p[1] = q[1]; 307 p[2] = q[2]; 308 return p + 3 - dest; 309 } 310 break; 311 } 312 313 // Process remaining characters.
314 mike 1.111.2.1 315 while (n) 316 {
317 mike 1.111.2.2 // Optimize for 7-bit ASCII case.
318 mike 1.111.2.1
319 mike 1.111.2.2 if (*q < 128)
320 mike 1.111.2.1 { 321 p++ = q++; 322 n--; 323 }
324 mike 1.111.2.2 else 325 { 326 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
327 mike 1.111.2.1
328 mike 1.111.2.2 if (c > n \|\| !isValid_U8(q, c) \|\| 329 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 330 {
331 mike 1.111.2.13 MessageLoaderParms parms("Common.String.BAD_UTF8", 332 "The byte sequence starting at index $0 " 333 "is not valid UTF-8 encoding.", 334 q - (const Uint8*)src); 335 throw Exception(parms);
336 mike 1.111.2.2 }
337 mike 1.111.2.1
338 mike 1.111.2.2 n -= c;
339 mike 1.111.2.1 } 340 }
341 mike 1.27
342 mike 1.111.2.1 return p - dest; 343 }
344 mike 1.27
345 mike 1.111.2.1 // Note: dest must be at least three times src (plus an extra byte for 346 // terminator).
347 mike 1.111.2.2 static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)
348 mike 1.111.2.1 {
349 mike 1.111.2.2 const Uint16* q = src; 350 Uint8* p = (Uint8)dest; 351 352 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128) 353 { 354 p[0] = q[0]; 355 p[1] = q[1]; 356 p[2] = q[2]; 357 p[3] = q[3]; 358 p += 4; 359 q += 4; 360 n -= 4; 361 } 362 363 switch (n) 364 { 365 case 0: 366 return p - (Uint8)dest; 367 case 1: 368 if (q[0] < 128) 369 { 370 mike 1.111.2.2 p[0] = q[0]; 371 return p + 1 - (Uint8)dest; 372 } 373 break; 374 case 2: 375 if (q[0] < 128 && q[1] < 128) 376 { 377 p[0] = q[0]; 378 p[1] = q[1]; 379 return p + 2 - (Uint8)dest; 380 } 381 break; 382 case 3: 383 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 384 { 385 p[0] = q[0]; 386 p[1] = q[1]; 387 p[2] = q[2]; 388 return p + 3 - (Uint8)dest; 389 } 390 break; 391 mike 1.111.2.2 } 392 393 // If this line was reached, there must be characters greater than 128. 394 395 UTF16toUTF8(&q, q + n, &p, p + 3 n); 396 397 return p - (Uint8*)dest;
398 mike 1.111.2.1 }
399 mike 1.27
400 mike 1.111.2.1 static inline size_t _convert(Uint16* p, const char* q, size_t n) 401 { 402 #ifdef PEGASUS_STRING_NO_UTF8 403 _copy(p, q, n);
404 mike 1.27 return n;
405 mike 1.111.2.1 #else 406 return _copy_from_utf8(p, q, n); 407 #endif
408 mike 1.27 } 409
410 mike 1.111.2.1 //==============================================================================
411 chuck 1.102 //
412 mike 1.111.2.1 // class CString
413 chuck 1.102 //
414 mike 1.111.2.1 //============================================================================== 415 416 CString::CString(const CString& cstr) : _rep(0)
417 chuck 1.102 {
418 mike 1.111.2.1 if (cstr._rep)
419 chuck 1.102 {
420 mike 1.111.2.1 size_t n = strlen(cstr._rep) + 1; 421 _rep = (char*)operator new(n); 422 memcpy(_rep, cstr._rep, n); 423 } 424 } 425 426 CString& CString::operator=(const CString& cstr) 427 { 428 if (&cstr != this) 429 { 430 if (_rep)
431 david.dillard 1.105 {
432 mike 1.111.2.1 operator delete(_rep); 433 _rep = 0;
434 chuck 1.102 }
435 mike 1.111.2.1 436 if (cstr._rep)
437 chuck 1.102 {
438 mike 1.111.2.1 size_t n = strlen(cstr._rep) + 1; 439 _rep = (char*)operator new(n); 440 memcpy(_rep, cstr._rep, n);
441 chuck 1.102 }
442 mike 1.111.2.1 }
443 chuck 1.102
444 mike 1.111.2.1 return *this;
445 chuck 1.102 } 446
447 mike 1.111.2.1 //============================================================================== 448 // 449 // class StringRep 450 // 451 //==============================================================================
452 kumpf 1.43
453 mike 1.111.2.1 StringRep StringRep::_empty_rep;
454 kumpf 1.43
455 mike 1.111.2.1 inline StringRep* StringRep::alloc(size_t cap)
456 mike 1.27 {
457 mike 1.111.2.1 StringRep* rep = (StringRep)::operator new( 458 sizeof(StringRep) + cap sizeof(Uint16)); 459 rep->cap = cap; 460 Atomic_create(&rep->refs, 1); 461 462 return rep;
463 mike 1.27 } 464
465 mike 1.111.2.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
466 mike 1.27 {
467 mike 1.111.2.1 if (cap > rep->cap \|\| Atomic_get(&rep->refs) != 1) 468 { 469 size_t n = _next_pow_2(cap); 470 StringRep* new_rep = StringRep::alloc(n); 471 new_rep->size = rep->size; 472 _copy(new_rep->data, rep->data, rep->size + 1); 473 StringRep::unref(rep); 474 rep = new_rep; 475 }
476 kumpf 1.39 }
477 tony 1.66
478 mike 1.111.2.1 StringRep* StringRep::create(const Uint16* data, size_t size)
479 kumpf 1.39 {
480 mike 1.111.2.1 StringRep* rep = StringRep::alloc(size); 481 rep->size = size; 482 _copy(rep->data, data, size); 483 rep->data[size] = '\0'; 484 return rep;
485 kumpf 1.39 } 486
487 mike 1.111.2.1 StringRep* StringRep::copy_on_write(StringRep* rep)
488 kumpf 1.39 {
489 mike 1.111.2.1 // Return a new copy of rep. Release rep.
490 david.dillard 1.105
491 mike 1.111.2.1 StringRep* new_rep = StringRep::alloc(rep->size); 492 new_rep->size = rep->size; 493 _copy(new_rep->data, rep->data, rep->size); 494 new_rep->data[new_rep->size] = '\0'; 495 StringRep::unref(rep); 496 return new_rep;
497 mike 1.27 } 498
499 mike 1.111.2.1 StringRep* StringRep::create(const char* data, size_t size)
500 kumpf 1.39 {
501 mike 1.111.2.1 StringRep* rep = StringRep::alloc(size); 502 rep->size = _convert((Uint16*)rep->data, data, size); 503 rep->data[rep->size] = '\0';
504 david.dillard 1.105
505 mike 1.111.2.1 return rep;
506 kumpf 1.39 } 507
508 mike 1.111.2.4 StringRep* StringRep::createASCII7(const char* data, size_t size)
509 mike 1.111.2.2 { 510 StringRep* rep = StringRep::alloc(size); 511 _copy((Uint16*)rep->data, data, size); 512 rep->data[rep->size = size] = '\0'; 513 return rep; 514 } 515
516 mike 1.111.2.1 Uint32 StringRep::length(const Uint16* str)
517 mike 1.27 {
518 mike 1.111.2.5 // Note: We could unroll this but it is rarely called.
519 david.dillard 1.105
520 mike 1.111.2.1 const Uint16* end = (Uint16)str; 521 522 while (end++) 523 ; 524 525 return end - str - 1;
526 mike 1.27 } 527
528 mike 1.111.2.1 //============================================================================== 529 // 530 // class String 531 // 532 //==============================================================================
533 david.dillard 1.105
534 mike 1.111.2.1 const String String::EMPTY; 535 536 String::String(const String& str, Uint32 n) 537 { 538 _check_bounds(n, str._rep->size); 539 _rep = StringRep::create(str._rep->data, n);
540 kumpf 1.39 }
541 mike 1.27
542 mike 1.111.2.1 String::String(const Char16* str)
543 kumpf 1.39 {
544 mike 1.111.2.1 _check_null_pointer(str); 545 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
546 mike 1.27 } 547
548 mike 1.111.2.1 String::String(const Char16* str, Uint32 n)
549 mike 1.27 {
550 mike 1.111.2.1 _check_null_pointer(str); 551 _rep = StringRep::create((Uint16*)str, n);
552 mike 1.27 } 553
554 mike 1.111.2.1 String::String(const char* str)
555 mike 1.27 {
556 mike 1.111.2.1 _check_null_pointer(str); 557 _rep = StringRep::create(str, strlen(str));
558 mike 1.27 } 559
560 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
561 mike 1.111.2.2 String::String(const char* str, String::ASCII7Tag tag) 562 { 563 _check_null_pointer(str);
564 mike 1.111.2.4 _rep = StringRep::createASCII7(str, strlen(str));
565 mike 1.111.2.2 }
566 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
567 mike 1.111.2.2
568 mike 1.111.2.1 String::String(const char* str, Uint32 n)
569 mike 1.27 {
570 mike 1.111.2.1 _check_null_pointer(str); 571 _rep = StringRep::create(str, n); 572 }
573 david.dillard 1.105
574 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
575 mike 1.111.2.2 String::String(const char* str, size_t n, String::ASCII7Tag tag) 576 { 577 _check_null_pointer(str);
578 mike 1.111.2.4 _rep = StringRep::createASCII7(str, n);
579 mike 1.111.2.2 }
580 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
581 mike 1.111.2.2
582 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
583 mike 1.111.2.1 String::String(const String& s1, const String& s2) 584 { 585 size_t n1 = s1._rep->size; 586 size_t n2 = s2._rep->size; 587 size_t n = n1 + n2; 588 _rep = StringRep::alloc(n); 589 _copy(_rep->data, s1._rep->data, n1); 590 _copy(_rep->data + n1, s2._rep->data, n2); 591 _rep->size = n; 592 _rep->data[n] = '\0'; 593 }
594 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
595 mike 1.111.2.1
596 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
597 mike 1.111.2.1 String::String(const String& s1, const char* s2) 598 { 599 _check_null_pointer(s2); 600 size_t n1 = s1._rep->size; 601 size_t n2 = strlen(s2); 602 _rep = StringRep::alloc(n1 + n2); 603 _copy(_rep->data, s1._rep->data, n1); 604 _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2); 605 _rep->data[_rep->size] = '\0'; 606 }
607 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
608 mike 1.111.2.1
609 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
610 mike 1.111.2.1 String::String(const char* s1, const String& s2) 611 { 612 _check_null_pointer(s1); 613 size_t n1 = strlen(s1); 614 size_t n2 = s2._rep->size; 615 _rep = StringRep::alloc(n1 + n2); 616 _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1); 617 _copy(_rep->data + n1, s2._rep->data, n2); 618 _rep->data[_rep->size] = '\0';
619 mike 1.27 }
620 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
621 mike 1.27
622 mike 1.111.2.1 String& String::assign(const String& str)
623 mike 1.27 {
624 mike 1.111.2.1 if (_rep != str._rep)
625 david.dillard 1.105 {
626 mike 1.111.2.1 StringRep::unref(_rep); 627 StringRep::ref(_rep = str._rep);
628 david.dillard 1.105 } 629
630 mike 1.27 return *this; 631 } 632
633 mike 1.111.2.1 String& String::assign(const Char16* str, Uint32 n)
634 chuck 1.102 {
635 mike 1.111.2.1 _check_null_pointer(str); 636 637 if (n > _rep->cap \|\| Atomic_get(&_rep->refs) != 1)
638 david.dillard 1.105 {
639 mike 1.111.2.1 StringRep::unref(_rep); 640 _rep = StringRep::alloc(n);
641 david.dillard 1.105 } 642
643 mike 1.111.2.1 _rep->size = n; 644 _copy(_rep->data, (Uint16*)str, n); 645 _rep->data[n] = '\0'; 646
647 chuck 1.102 return *this; 648 } 649
650 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
651 mike 1.27 {
652 mike 1.111.2.1 _check_null_pointer(str); 653 654 if (n > _rep->cap \|\| Atomic_get(&_rep->refs) != 1)
655 david.dillard 1.105 {
656 mike 1.111.2.1 StringRep::unref(_rep); 657 _rep = StringRep::alloc(n);
658 david.dillard 1.105 } 659
660 mike 1.111.2.1 _rep->size = _convert(_rep->data, str, n); 661 _rep->data[_rep->size] = 0; 662
663 mike 1.27 return *this; 664 } 665
666 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
667 mike 1.111.2.4 String& String::assignASCII7(const char* str, Uint32 n)
668 mike 1.111.2.2 { 669 _check_null_pointer(str); 670 671 if (n > _rep->cap \|\| Atomic_get(&_rep->refs) != 1) 672 { 673 StringRep::unref(_rep); 674 _rep = StringRep::alloc(n); 675 } 676 677 _copy(_rep->data, str, n); 678 _rep->data[_rep->size = n] = 0; 679 680 return *this; 681 }
682 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
683 mike 1.111.2.2
684 kumpf 1.39 void String::clear() 685 {
686 mike 1.111.2.1 if (_rep->size) 687 { 688 if (Atomic_get(&_rep->refs) == 1) 689 _rep->size = 0; 690 else 691 { 692 StringRep::unref(_rep); 693 _rep = &StringRep::_empty_rep; 694 } 695 }
696 kumpf 1.39 } 697
698 mike 1.111.2.1 void String::reserveCapacity(Uint32 cap)
699 kumpf 1.39 {
700 mike 1.111.2.1 _reserve(_rep, cap);
701 kumpf 1.39 } 702
703 mike 1.111.2.1 CString String::getCString() const 704 { 705 #ifdef PEGASUS_STRING_NO_UTF8 706 char* str = (char*)operator new(_rep->size + 1); 707 _copy(str, _rep->data, _rep->size); 708 str[_rep->size] = '\0'; 709 return CString(str);
710 gs.keenan 1.110 #else
711 mike 1.111.2.1 Uint32 n = 3 * _rep->size; 712 char* str = (char*)operator new(n + 1);
713 mike 1.111.2.2 size_t size = _copy_to_utf8(str, _rep->data, _rep->size);
714 mike 1.111.2.1 str[size] = '\0'; 715 return CString(str);
716 gs.keenan 1.110 #endif
717 kumpf 1.39 } 718
719 mike 1.111.2.1 String& String::append(const Char16* str, Uint32 n)
720 kumpf 1.39 {
721 mike 1.111.2.1 _check_null_pointer(str);
722 kumpf 1.39
723 mike 1.111.2.1 size_t old_size = _rep->size; 724 size_t new_size = old_size + n; 725 _reserve(_rep, new_size); 726 _copy(_rep->data + old_size, (Uint16*)str, n); 727 _rep->size = new_size; 728 _rep->data[new_size] = '\0';
729 mike 1.27
730 mike 1.111.2.1 return *this;
731 mike 1.27 } 732
733 mike 1.111.2.1 String& String::append(const String& str)
734 mike 1.27 {
735 mike 1.111.2.1 return append((Char16*)str._rep->data, str._rep->size);
736 mike 1.27 } 737
738 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
739 mike 1.111.2.1 String& String::append(const char* str, Uint32 size)
740 kumpf 1.39 {
741 mike 1.111.2.1 _check_null_pointer(str); 742 743 size_t old_size = _rep->size; 744 size_t cap = old_size + size; 745 746 _reserve(_rep, cap); 747 _rep->size += _convert((Uint16*)_rep->data + old_size, str, size); 748 _rep->data[_rep->size] = '\0';
749 kumpf 1.39
750 mike 1.27 return *this; 751 }
752 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
753 mike 1.27
754 mike 1.111.2.1 void String::remove(Uint32 index, Uint32 n)
755 mike 1.27 {
756 mike 1.111.2.1 if (n == PEG_NOT_FOUND) 757 n = _rep->size - index;
758 mike 1.27
759 mike 1.111.2.1 _check_bounds(index + n, _rep->size); 760 761 if (Atomic_get(&_rep->refs) != 1) 762 _rep = StringRep::copy_on_write(_rep);
763 mike 1.27
764 mike 1.111.2.1 assert(index + n <= _rep->size);
765 mike 1.27
766 mike 1.111.2.1 size_t rem = _rep->size - (index + n); 767 Uint16* data = _rep->data; 768 769 if (rem) 770 memmove(data + index, data + index + n, rem * sizeof(Uint16)); 771 772 _rep->size -= n; 773 data[_rep->size] = '\0';
774 mike 1.27 } 775
776 mike 1.111.2.1 String String::subString(Uint32 index, Uint32 n) const
777 mike 1.27 {
778 mike 1.111.2.5 // Note: this implementation is very permissive but used for
779 mike 1.111.2.1 // backwards compatibility. 780 781 if (index < _rep->size)
782 mike 1.27 {
783 mike 1.111.2.1 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index) 784 n = _rep->size - index;
785 mike 1.27
786 mike 1.111.2.1 return String((Char16*)_rep->data + index, n);
787 mike 1.27 }
788 david.dillard 1.105 789 return String();
790 mike 1.27 } 791 792 Uint32 String::find(Char16 c) const 793 {
794 mike 1.111.2.1 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
795 mike 1.27
796 mike 1.111.2.1 if (p) 797 return p - _rep->data;
798 mike 1.27 799 return PEG_NOT_FOUND; 800 } 801
802 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
803 mike 1.30 {
804 mike 1.111.2.1 _check_bounds(index, _rep->size);
805 mike 1.30
806 mike 1.111.2.1 if (index >= _rep->size) 807 return PEG_NOT_FOUND; 808 809 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c); 810 811 if (p) 812 return p - _rep->data;
813 mike 1.30 814 return PEG_NOT_FOUND; 815 } 816
817 mike 1.111.2.1 Uint32 String::_find_aux(const Char16* s, Uint32 n) const
818 mike 1.27 {
819 mike 1.111.2.1 _check_null_pointer(s);
820 mike 1.27
821 mike 1.111.2.1 const Uint16* data = _rep->data; 822 size_t rem = _rep->size;
823 mike 1.30
824 mike 1.111.2.1 while (n <= rem)
825 mike 1.27 {
826 mike 1.111.2.1 Uint16* p = (Uint16*)_find(data, rem, s[0]);
827 david.dillard 1.105
828 mike 1.111.2.1 if (!p) 829 break; 830 831 if (memcmp(p, s, n * sizeof(Uint16)) == 0) 832 return p - _rep->data; 833 834 p++; 835 rem -= p - data; 836 data = p;
837 mike 1.27 }
838 mike 1.111.2.1
839 mike 1.27 return PEG_NOT_FOUND; 840 } 841
842 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
843 mike 1.111.2.1 Uint32 String::find(const char* s) const 844 { 845 _check_null_pointer(s); 846
847 mike 1.111.2.5 // Note: could optimize away creation of temporary, but this is rarely 848 // called.
849 mike 1.111.2.1 return find(String(s)); 850 }
851 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
852 mike 1.111.2.1
853 mike 1.27 Uint32 String::reverseFind(Char16 c) const 854 {
855 mike 1.111.2.1 Uint16 x = c; 856 Uint16* p = _rep->data; 857 Uint16* q = _rep->data + _rep->size;
858 mike 1.27
859 mike 1.111.2.1 while (q != p)
860 mike 1.27 {
861 mike 1.111.2.1 if (*--q == x) 862 return q - p;
863 mike 1.27 } 864 865 return PEG_NOT_FOUND; 866 } 867 868 void String::toLower() 869 {
870 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
871 mike 1.111.2.1
872 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
873 david 1.90 {
874 chuck 1.111.2.12 if (Atomic_get(&_rep->refs) != 1) 875 _rep = StringRep::copy_on_write(_rep); 876 877 // This will do a locale-insensitive, but context-sensitive convert. 878 // Since context-sensitive casing looks at adjacent chars, this 879 // prevents optimizations where the us-ascii is converted before 880 // calling ICU. 881 // The string may shrink or expand after the convert. 882
883 mike 1.111.2.1 //// First calculate size of resulting string. u_strToLower() returns 884 //// only the size when zero is passed as the destination size argument. 885
886 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 887
888 mike 1.111.2.1 int32_t new_size = u_strToLower( 889 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
890 chuck 1.111.2.12 891 err = U_ZERO_ERROR;
892 chuck 1.99
893 mike 1.111.2.1 //// Reserve enough space for the result. 894 895 if ((Uint32)new_size > _rep->cap) 896 _reserve(_rep, new_size); 897 898 //// Perform the conversion (overlapping buffers are allowed). 899 900 u_strToLower((UChar)_rep->data, new_size, 901 (UChar)_rep->data, _rep->size, NULL, &err);
902 yi.zhou 1.108
903 mike 1.111.2.1 _rep->size = new_size;
904 mike 1.111.2.13 return;
905 david 1.90 }
906 mike 1.111.2.1
907 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
908 mike 1.111.2.1 909 if (Atomic_get(&_rep->refs) != 1) 910 _rep = StringRep::copy_on_write(_rep); 911 912 Uint16* p = _rep->data; 913 size_t n = _rep->size; 914 915 for (; n--; p++)
916 david 1.90 {
917 mike 1.111.2.1 if (!(p & 0xFF00)) 918 p = _to_lower(*p);
919 mike 1.27 }
920 kumpf 1.39 } 921
922 chuck 1.99 void String::toUpper()
923 david 1.90 {
924 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
925 mike 1.111.2.1
926 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
927 chuck 1.99 {
928 chuck 1.111.2.12 if (Atomic_get(&_rep->refs) != 1)
929 mike 1.111.2.13 _rep = StringRep::copy_on_write(_rep);
930 chuck 1.111.2.12 931 // This will do a locale-insensitive, but context-sensitive convert. 932 // Since context-sensitive casing looks at adjacent chars, this 933 // prevents optimizations where the us-ascii is converted before 934 // calling ICU. 935 // The string may shrink or expand after the convert. 936
937 mike 1.111.2.1 //// First calculate size of resulting string. u_strToUpper() returns 938 //// only the size when zero is passed as the destination size argument. 939
940 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 941
942 mike 1.111.2.1 int32_t new_size = u_strToUpper( 943 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
944 chuck 1.99
945 chuck 1.111.2.12 err = U_ZERO_ERROR; 946
947 mike 1.111.2.1 //// Reserve enough space for the result.
948 yi.zhou 1.108
949 mike 1.111.2.1 if ((Uint32)new_size > _rep->cap) 950 _reserve(_rep, new_size);
951 david 1.90
952 mike 1.111.2.1 //// Perform the conversion (overlapping buffers are allowed).
953 kumpf 1.39
954 mike 1.111.2.1 u_strToUpper((UChar)_rep->data, new_size, 955 (UChar)_rep->data, _rep->size, NULL, &err);
956 mike 1.27
957 mike 1.111.2.1 _rep->size = new_size;
958 chuck 1.111.2.12
959 mike 1.111.2.13 return;
960 mike 1.27 } 961
962 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
963 mike 1.111.2.1 964 if (Atomic_get(&_rep->refs) != 1) 965 _rep = StringRep::copy_on_write(_rep); 966 967 Uint16* p = _rep->data; 968 size_t n = _rep->size; 969 970 for (; n--; p++) 971 p = _to_upper(p);
972 mike 1.27 } 973
974 mike 1.111.2.1 int String::compare(const String& s1, const String& s2, Uint32 n)
975 mike 1.30 {
976 mike 1.111.2.1 assert(n <= s1._rep->size); 977 assert(n <= s2._rep->size);
978 kumpf 1.43
979 mike 1.111.2.1 // Ignoring error in which n is greater than s1.size() or s2.size() 980 return _compare(s1._rep->data, s2._rep->data, n); 981 }
982 mike 1.30
983 mike 1.111.2.1 int String::compare(const String& s1, const String& s2) 984 { 985 return _compare(s1._rep->data, s2._rep->data); 986 }
987 mike 1.30
988 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
989 mike 1.111.2.1 int String::compare(const String& s1, const char* s2) 990 { 991 _check_null_pointer(s2);
992 mike 1.30
993 mike 1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8 994 return _compare_no_utf8(s1._rep->data, s2); 995 #else 996 // ATTN: optimize this! 997 return String::compare(s1, String(s2)); 998 #endif
999 mike 1.30 }
1000 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
1001 mike 1.30
1002 mike 1.111.2.1 int String::compareNoCase(const String& str1, const String& str2)
1003 kumpf 1.40 {
1004 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
1005 mike 1.111.2.1
1006 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1007 {
1008 mike 1.111.2.1 return u_strcasecmp( 1009 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1010 yi.zhou 1.108 }
1011 kumpf 1.40
1012 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
1013 kumpf 1.40
1014 mike 1.111.2.1 const Uint16* s1 = str1._rep->data; 1015 const Uint16* s2 = str2._rep->data; 1016 1017 while (s1 && s2) 1018 { 1019 int r = _to_lower(s1++) - _to_lower(s2++);
1020 kumpf 1.40
1021 david.dillard 1.105 if (r) 1022 return r;
1023 kumpf 1.40 } 1024
1025 mike 1.111.2.1 if (*s2)
1026 david.dillard 1.105 return -1;
1027 mike 1.111.2.1 else if (*s1)
1028 david.dillard 1.105 return 1;
1029 kumpf 1.40 1030 return 0; 1031 } 1032
1033 mike 1.111.2.1 Boolean String::equalNoCase_aux(const String& s1, const String& s2)
1034 mike 1.27 {
1035 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
1036 mike 1.27
1037 mike 1.111.2.1 return String::compareNoCase(s1, s2) == 0;
1038 kumpf 1.39
1039 chuck 1.111.2.14 #else /* PEGASUS_HAS_ICU */
1040 kumpf 1.39
1041 mike 1.111.2.1 Uint16* p = (Uint16)s1._rep->data; 1042 Uint16 q = (Uint16*)s2._rep->data; 1043 Uint32 n = s2._rep->size; 1044 1045 while (n >= 8) 1046 { 1047 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) \|\| 1048 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) \|\| 1049 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) \|\| 1050 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) \|\| 1051 ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) \|\| 1052 ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) \|\| 1053 ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) \|\| 1054 ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7])))) 1055 { 1056 return false; 1057 } 1058 1059 n -= 8; 1060 p += 8; 1061 q += 8; 1062 mike 1.111.2.1 } 1063 1064 while (n >= 4) 1065 { 1066 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) \|\| 1067 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) \|\| 1068 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) \|\| 1069 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3])))) 1070 { 1071 return false; 1072 } 1073 1074 n -= 4; 1075 p += 4; 1076 q += 4; 1077 }
1078 mike 1.27
1079 kumpf 1.39 while (n--) 1080 {
1081 mike 1.111.2.1 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0])))) 1082 return false; 1083 1084 p++; 1085 q++;
1086 kumpf 1.39 }
1087 mike 1.28
1088 kumpf 1.39 return true;
1089 david 1.69
1090 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
1091 mike 1.111.2.1 }
1092 mike 1.27
1093 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
1094 mike 1.111.2.1 Boolean String::equalNoCase(const String& s1, const char* s2)
1095 david 1.69 {
1096 mike 1.111.2.1 _check_null_pointer(s2);
1097 david 1.69
1098 chuck 1.111.2.14 #if defined(PEGASUS_HAS_ICU)
1099 david.dillard 1.105
1100 mike 1.111.2.1 return String::equalNoCase(s1, String(s2));
1101 david 1.71
1102 mike 1.111.2.1 #elif defined(PEGASUS_STRING_NO_UTF8)
1103 kumpf 1.42
1104 mike 1.111.2.1 const Uint16* p1 = (Uint16)s1._rep->data; 1105 const char p2 = s2; 1106 size_t n = s1._rep->size;
1107 karl 1.36
1108 mike 1.111.2.1 while (n--) 1109 { 1110 if (!*p2) 1111 return false;
1112 david.dillard 1.105
1113 mike 1.111.2.1 if (_to_upper(p1++) != CharSet::to_upper(int(p2++))) 1114 return false; 1115 } 1116 1117 return true;
1118 david.dillard 1.105
1119 chuck 1.111.2.14 #else /* PEGASUS_HAS_ICU */
1120 chuck 1.78
1121 mike 1.111.2.1 // ATTN: optimize this! 1122 return String::equalNoCase(s1, String(s2));
1123 david.dillard 1.105
1124 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
1125 mike 1.111.2.1 }
1126 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
1127 karl 1.36
1128 mike 1.111.2.1 Boolean String::equal(const String& s1, const String& s2)
1129 karl 1.36 {
1130 mike 1.111.2.1 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 1131 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1132 karl 1.36 } 1133
1134 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
1135 mike 1.111.2.1 Boolean String::equal(const String& s1, const char* s2)
1136 karl 1.36 {
1137 mike 1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
1138 kumpf 1.39
1139 mike 1.111.2.1 _check_null_pointer(s2);
1140 kumpf 1.39
1141 mike 1.111.2.1 const Uint16* p = (Uint16)s1._rep->data; 1142 const char q = s2;
1143 kumpf 1.39
1144 mike 1.111.2.1 while (p && q) 1145 { 1146 if (p++ != Uint16(q++)) 1147 return false; 1148 }
1149 kumpf 1.39
1150 mike 1.111.2.1 return !(p \|\| q);
1151 kumpf 1.39
1152 mike 1.111.2.1 #else /* PEGASUS_STRING_NO_UTF8 */
1153 kumpf 1.39
1154 mike 1.111.2.1 return String::equal(s1, String(s2)); 1155 1156 #endif /* PEGASUS_STRING_NO_UTF8 */
1157 kumpf 1.39 }
1158 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
1159 kumpf 1.39
1160 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1161 kumpf 1.39 {
1162 david 1.69 #if defined(PEGASUS_OS_OS400)
1163 mike 1.111.2.1
1164 david 1.93 CString cstr = str.getCString();
1165 david 1.69 const char* utf8str = cstr; 1166 os << utf8str;
1167 chuck 1.111.2.12 return os; 1168 #else
1169 david 1.69
1170 chuck 1.111.2.14 #if defined(PEGASUS_HAS_ICU)
1171 mike 1.111.2.1
1172 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1173 {
1174 david.dillard 1.105 char buf = NULL; 1175 const int size = str.size() 6;
1176 mike 1.111.2.1 UnicodeString UniStr( 1177 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1178 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1179 buf = new char[bufsize+1]; 1180 UniStr.extract(0,bufsize,buf); 1181 os << buf; 1182 os.flush(); 1183 delete [] buf;
1184 chuck 1.111.2.12 return os;
1185 yi.zhou 1.108 }
1186 mike 1.111.2.1
1187 chuck 1.111.2.14 #endif // PEGASUS_HAS_ICU
1188 mike 1.111.2.1 1189 for (Uint32 i = 0, n = str.size(); i < n; i++)
1190 yi.zhou 1.108 {
1191 chuck 1.111.2.12 Uint16 code = str[i];
1192 david.dillard 1.105
1193 chuck 1.111.2.12 if (code > 0 && !(code & 0xFF00)) 1194 os << char(code); 1195 else 1196 { 1197 // Print in hex format: 1198 char buffer[8]; 1199 sprintf(buffer, "\\x%04X", code); 1200 os << buffer; 1201 }
1202 yi.zhou 1.108 }
1203 kumpf 1.39 1204 return os;
1205 chuck 1.111.2.12 #endif // PEGASUS_OS_OS400
1206 kumpf 1.39 } 1207
1208 mike 1.111.2.1 void String::_append_char_aux()
1209 kumpf 1.39 {
1210 mike 1.111.2.1 StringRep* tmp;
1211 kumpf 1.39
1212 mike 1.111.2.1 if (_rep->cap) 1213 { 1214 tmp = StringRep::alloc(2 * _rep->cap); 1215 tmp->size = _rep->size; 1216 _copy(tmp->data, _rep->data, _rep->size); 1217 } 1218 else 1219 { 1220 tmp = StringRep::alloc(8); 1221 tmp->size = 0; 1222 }
1223 kumpf 1.39
1224 mike 1.111.2.1 StringRep::unref(_rep); 1225 _rep = tmp;
1226 kumpf 1.39 } 1227
1228 mike 1.111.2.1 PEGASUS_NAMESPACE_END
1229 kumpf 1.39
1230 mike 1.111.2.1 /* 1231 ================================================================================
1232 kumpf 1.39
1233 mike 1.111.2.1 String optimizations: 1234 1235 1. Added mechanism allowing certain functions to be inlined only when 1236 used by internal Pegasus modules. External modules (i.e., providers) 1237 link to a non-inline version, which allows for binary compatibility. 1238 1239 2. Implemented copy-on-write with atomic increment/decrement. This 1240 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1241 for the 'ni1000' benchmark. 1242 1243 3. Employed loop unrolling in several places. For example, see: 1244 1245 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1246 1247 4. Used the "empty-rep" optimization (described in whitepaper from the 1248 GCC Developers Summit). This reduced default construction to a simple 1249 pointer assignment. 1250 1251 inline String::String() : _rep(&_empty_rep) { } 1252 1253 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1254 mike 1.111.2.1 For example: 1255 1256 static const char _upper[] = 1257 { 1258 0,1,2,...255 1259 }; 1260 1261 inline Uint16 _to_upper(Uint16 x) 1262 { 1263 return (x & 0xFF00) ? x : _upper[x]; 1264 } 1265 1266 This outperforms the system implementation by avoiding an anding 1267 operation. 1268 1269 6. Implemented char* version of the following member functions to 1270 eliminate unecessary creation of anonymous string objects 1271 (temporaries). 1272 1273 String(const String& s1, const char* s2); 1274 String(const char* s1, const String& s2); 1275 mike 1.111.2.1 String& String::operator=(const char* str); 1276 Uint32 String::find(const char* s) const; 1277 bool String::equal(const String& s1, const char* s2); 1278 static int String::compare(const String& s1, const char* s2); 1279 String& String::append(const char* str); 1280 String& String::append(const char* str, Uint32 size); 1281 static bool String::equalNoCase(const String& s1, const char* s2); 1282 String& operator=(const char* str) 1283 String& String::assign(const char* str) 1284 String& String::append(const char* str) 1285 Boolean operator==(const String& s1, const char* s2) 1286 Boolean operator==(const char* s1, const String& s2) 1287 Boolean operator!=(const String& s1, const char* s2) 1288 Boolean operator!=(const char* s1, const String& s2) 1289 Boolean operator<(const String& s1, const char* s2) 1290 Boolean operator<(const char* s1, const String& s2) 1291 Boolean operator>(const String& s1, const char* s2) 1292 Boolean operator>(const char* s1, const String& s2) 1293 Boolean operator<=(const String& s1, const char* s2) 1294 Boolean operator<=(const char* s1, const String& s2) 1295 Boolean operator>=(const String& s1, const char* s2) 1296 mike 1.111.2.1 Boolean operator>=(const char* s1, const String& s2) 1297 String operator+(const String& s1, const char* s2) 1298 String operator+(const char* s1, const String& s2) 1299 1300 7. Optimized _next_pow_2(), used in rounding the capacity to the next 1301 power of two (algorithm from the book "Hacker's Delight"). 1302 1303 static Uint32 _next_pow_2(Uint32 x) 1304 { 1305 if (x < 8) 1306 return 8; 1307 1308 x--; 1309 x \|= (x >> 1); 1310 x \|= (x >> 2); 1311 x \|= (x >> 4); 1312 x \|= (x >> 8); 1313 x \|= (x >> 16); 1314 x++; 1315 1316 return x; 1317 mike 1.111.2.1 } 1318 1319 8. Implemented "concatenating constructors" to eliminate temporaries 1320 created by operator+(). This scheme employs the "return-value 1321 optimization" described by Stan Lippman. 1322 1323 inline String operator+(const String& s1, const String& s2) 1324 { 1325 return String(s1, s2, 0); 1326 } 1327 1328 9. Experimented to find the optimial initial size for a short string.
1329 mike 1.111.2.13 Eight seems to offer the best tradeoff between space and time.
1330 mike 1.111.2.1 1331 10. Inlined all members of the Char16 class. 1332 1333 11. Used Uint16 internally in the String class. This showed no improvememnt 1334 since Char16 was already fully inlined and was essentially reduced to 1335 Uint16 in any case. 1336 1337 12. Implemented conditional logic (#if) allowing error checking logic to 1338 be excluded to better performance. Examples include bounds checking 1339 and null-pointer checking. 1340 1341 13. Used memcpy() and memcmp() where possible. These are implemented using 1342 the rep family of intructions under Intel and are much faster. 1343
1344 mike 1.111.2.2 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 1345 copy routine overhead. 1346 1347 15. Added ASCII7 form of the constructor and assign(). 1348 1349 String s("hello world", String::ASCII7); 1350
1351 mike 1.111.2.4 s.assignASCII7("hello world");
1352 mike 1.111.2.2
1353 mike 1.111.2.6 This avoids slower UTF8 processing when not needed.
1354 mike 1.111.2.2
1355 mike 1.111.2.1 ================================================================================ 1356 */

No CVS admin address has been configured