pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
2 mike 1.27 //
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 mike 1.27 // 12 // Permission is hereby granted, free of charge, to any person obtaining a copy
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 14 // deal in the Software without restriction, including without limitation the 15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 17 // furnished to do so, subject to the following conditions:
18 david.dillard 1.105 //
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 // 28 //============================================================================== 29 // 30 // Author: Mike Brasher (mbrasher@bmc.com) 31 // 32 //%///////////////////////////////////////////////////////////////////////////// 33
34 mike 1.111.2.6 #define PEGASUS_USE_INTERNAL_INLINES
35 mike 1.27 #include "String.h"
36 mike 1.111.2.6 #include <cassert>
37 kumpf 1.48 #include "InternalException.h"
38 david 1.69 #include "CommonUTF.h"
39 mike 1.111.2.1 #include "CharSet.h"
40 david 1.69
41 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
42 chuck 1.99 #include <unicode/ustring.h> 43 #include <unicode/uchar.h>
44 david 1.69 #endif 45
46 mike 1.27 PEGASUS_NAMESPACE_BEGIN 47
48 mike 1.111.2.1 //==============================================================================
49 kumpf 1.39 //
50 mike 1.111.2.6 // Compile-time switches (defined macros). 51 // 52 // PEGASUS_STRING_ENABLE_ICU -- enables use of ICU package. 53 // 54 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions 55 // 56 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. 57 // 58 // PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature. 59 // 60 //============================================================================== 61 62 //============================================================================== 63 //
64 mike 1.111.2.1 // File-scope definitions:
65 kumpf 1.54 //
66 mike 1.111.2.1 //==============================================================================
67 kumpf 1.54
68 mike 1.111.2.1 // Converts 16-bit characters to upper case. 69 inline Uint16 _to_upper(Uint16 x)
70 kumpf 1.54 {
71 mike 1.111.2.1 return (x & 0xFF00) ? x : CharSet::to_upper(x);
72 kumpf 1.54 } 73
74 mike 1.111.2.1 // Converts 16-bit characters to lower case. 75 inline Uint16 _to_lower(Uint16 x)
76 kumpf 1.54 {
77 mike 1.111.2.1 return (x & 0xFF00) ? x : CharSet::to_lower(x); 78 }
79 kumpf 1.82
80 mike 1.111.2.1 // Rounds x to the next power of two (or just returns 8 if x < 8). 81 static Uint32 _next_pow_2(Uint32 x) 82 { 83 if (x < 8) 84 return 8; 85 86 x--; 87 x \|= (x >> 1); 88 x \|= (x >> 2); 89 x \|= (x >> 4); 90 x \|= (x >> 8); 91 x \|= (x >> 16); 92 x++; 93 94 return x; 95 } 96 97 template<class P, class Q> 98 static void _copy(P* p, const Q* q, size_t n) 99 { 100 // Use loop unrolling. 101 mike 1.111.2.1 102 while (n >= 8) 103 { 104 p[0] = q[0]; 105 p[1] = q[1]; 106 p[2] = q[2]; 107 p[3] = q[3]; 108 p[4] = q[4]; 109 p[5] = q[5]; 110 p[6] = q[6]; 111 p[7] = q[7]; 112 p += 8; 113 q += 8; 114 n -= 8; 115 } 116 117 while (n >= 4)
118 kumpf 1.82 {
119 mike 1.111.2.1 p[0] = q[0]; 120 p[1] = q[1]; 121 p[2] = q[2]; 122 p[3] = q[3]; 123 p += 4; 124 q += 4; 125 n -= 4;
126 kumpf 1.82 }
127 mike 1.111.2.1 128 while (n--) 129 p++ = q++;
130 kumpf 1.54 } 131
132 mike 1.111.2.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
133 kumpf 1.54 {
134 mike 1.111.2.1 while (n >= 4) 135 { 136 if (s[0] == c) 137 return (Uint16)s; 138 if (s[1] == c) 139 return (Uint16)&s[1]; 140 if (s[2] == c) 141 return (Uint16)&s[2]; 142 if (s[3] == c) 143 return (Uint16)&s[3]; 144 145 n -= 4; 146 s += 4; 147 } 148 149 if (n) 150 { 151 if (s == c) 152 return (Uint16)s; 153 s++; 154 n--; 155 mike 1.111.2.1 } 156 157 if (n) 158 { 159 if (s == c) 160 return (Uint16)s; 161 s++; 162 n--; 163 } 164 165 if (n && s == c) 166 return (Uint16)s; 167 168 // Not found! 169 return 0;
170 kumpf 1.54 } 171
172 mike 1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2)
173 kumpf 1.54 {
174 mike 1.111.2.1 while (s1 && s2)
175 kumpf 1.82 {
176 mike 1.111.2.1 int r = s1++ - s2++; 177 178 if (r) 179 return r;
180 kumpf 1.82 }
181 mike 1.111.2.1 182 if (s2) 183 return -1; 184 else if (s1) 185 return 1; 186 187 return 0;
188 kumpf 1.54 } 189
190 mike 1.111.2.1 static int _compare_no_utf8(const Uint16* s1, const char* s2)
191 kumpf 1.56 {
192 mike 1.111.2.1 Uint16 c1; 193 Uint16 c2; 194 195 do
196 kumpf 1.81 {
197 mike 1.111.2.1 c1 = s1++; 198 c2 = s2++; 199 200 if (c1 == 0) 201 return c1 - c2;
202 kumpf 1.81 }
203 mike 1.111.2.1 while (c1 == c2); 204 205 return c1 - c2;
206 kumpf 1.56 } 207
208 mike 1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
209 kumpf 1.54 {
210 mike 1.111.2.1 // This should only be called when s1 and s2 have the same length. 211 212 while (n-- && (s1++ - s2++) == 0) 213 ; 214 215 return s1[-1] - s2[-1];
216 kumpf 1.54 } 217
218 mike 1.111.2.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n) 219 { 220 memcpy(s1, s2, n * sizeof(Uint16)); 221 }
222 kumpf 1.39
223 mike 1.111.2.1 void String_throw_out_of_bounds() 224 { 225 throw IndexOutOfBoundsException(); 226 } 227 228 #ifdef PEGASUS_STRING_NO_THROW 229 # define _check_null_pointer(ARG) /* empty / 230 #else 231 template<class T> 232 inline void _check_null_pointer(const T ptr) 233 { 234 if (!ptr) 235 throw NullPointer(); 236 } 237 #endif
238 mike 1.27
239 mike 1.111.2.1 static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)
240 mike 1.27 {
241 mike 1.111.2.1 Uint16* p = dest; 242 const Uint8* q = (const Uint8*)src;
243 mike 1.111.2.2 244 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead below 245 // this loop). Use factor-four loop-unrolling. 246 247 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128) 248 { 249 p[0] = q[0]; 250 p[1] = q[1]; 251 p[2] = q[2]; 252 p[3] = q[3]; 253 p += 4; 254 q += 4; 255 n -= 4; 256 } 257 258 switch (n) 259 { 260 case 0: 261 return p - dest; 262 case 1: 263 if (q[0] < 128) 264 mike 1.111.2.2 { 265 p[0] = q[0]; 266 return p + 1 - dest; 267 } 268 break; 269 case 2: 270 if (q[0] < 128 && q[1] < 128) 271 { 272 p[0] = q[0]; 273 p[1] = q[1]; 274 return p + 2 - dest; 275 } 276 break; 277 case 3: 278 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 279 { 280 p[0] = q[0]; 281 p[1] = q[1]; 282 p[2] = q[2]; 283 return p + 3 - dest; 284 } 285 mike 1.111.2.2 break; 286 } 287 288 // Process remaining characters.
289 mike 1.111.2.1 290 while (n) 291 {
292 mike 1.111.2.2 // Optimize for 7-bit ASCII case.
293 mike 1.111.2.1
294 mike 1.111.2.2 if (*q < 128)
295 mike 1.111.2.1 { 296 p++ = q++; 297 n--; 298 }
299 mike 1.111.2.2 else 300 { 301 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
302 mike 1.111.2.1
303 mike 1.111.2.2 if (c > n \|\| !isValid_U8(q, c) \|\| 304 UTF8toUTF16(&q, q + c, &p, p + n) != 0) 305 { 306 throw Exception("Bad UTF8 encoding"); 307 }
308 mike 1.111.2.1
309 mike 1.111.2.2 n -= c;
310 mike 1.111.2.1 } 311 }
312 mike 1.27
313 mike 1.111.2.1 return p - dest; 314 }
315 mike 1.27
316 mike 1.111.2.1 // Note: dest must be at least three times src (plus an extra byte for 317 // terminator).
318 mike 1.111.2.2 static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)
319 mike 1.111.2.1 {
320 mike 1.111.2.2 const Uint16* q = src; 321 Uint8* p = (Uint8)dest; 322 323 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128) 324 { 325 p[0] = q[0]; 326 p[1] = q[1]; 327 p[2] = q[2]; 328 p[3] = q[3]; 329 p += 4; 330 q += 4; 331 n -= 4; 332 } 333 334 switch (n) 335 { 336 case 0: 337 return p - (Uint8)dest; 338 case 1: 339 if (q[0] < 128) 340 { 341 mike 1.111.2.2 p[0] = q[0]; 342 return p + 1 - (Uint8)dest; 343 } 344 break; 345 case 2: 346 if (q[0] < 128 && q[1] < 128) 347 { 348 p[0] = q[0]; 349 p[1] = q[1]; 350 return p + 2 - (Uint8)dest; 351 } 352 break; 353 case 3: 354 if (q[0] < 128 && q[1] < 128 && q[2] < 128) 355 { 356 p[0] = q[0]; 357 p[1] = q[1]; 358 p[2] = q[2]; 359 return p + 3 - (Uint8)dest; 360 } 361 break; 362 mike 1.111.2.2 } 363 364 // If this line was reached, there must be characters greater than 128. 365 366 UTF16toUTF8(&q, q + n, &p, p + 3 n); 367 368 return p - (Uint8*)dest;
369 mike 1.111.2.1 }
370 mike 1.27
371 mike 1.111.2.1 static inline size_t _convert(Uint16* p, const char* q, size_t n) 372 { 373 #ifdef PEGASUS_STRING_NO_UTF8 374 _copy(p, q, n);
375 mike 1.27 return n;
376 mike 1.111.2.1 #else 377 return _copy_from_utf8(p, q, n); 378 #endif
379 mike 1.27 } 380
381 mike 1.111.2.1 //==============================================================================
382 chuck 1.102 //
383 mike 1.111.2.1 // class CString
384 chuck 1.102 //
385 mike 1.111.2.1 //============================================================================== 386 387 CString::CString(const CString& cstr) : _rep(0)
388 chuck 1.102 {
389 mike 1.111.2.1 if (cstr._rep)
390 chuck 1.102 {
391 mike 1.111.2.1 size_t n = strlen(cstr._rep) + 1; 392 _rep = (char*)operator new(n); 393 memcpy(_rep, cstr._rep, n); 394 } 395 } 396 397 CString& CString::operator=(const CString& cstr) 398 { 399 if (&cstr != this) 400 { 401 if (_rep)
402 david.dillard 1.105 {
403 mike 1.111.2.1 operator delete(_rep); 404 _rep = 0;
405 chuck 1.102 }
406 mike 1.111.2.1 407 if (cstr._rep)
408 chuck 1.102 {
409 mike 1.111.2.1 size_t n = strlen(cstr._rep) + 1; 410 _rep = (char*)operator new(n); 411 memcpy(_rep, cstr._rep, n);
412 chuck 1.102 }
413 mike 1.111.2.1 }
414 chuck 1.102
415 mike 1.111.2.1 return *this;
416 chuck 1.102 } 417
418 mike 1.111.2.1 //============================================================================== 419 // 420 // class StringRep 421 // 422 //==============================================================================
423 kumpf 1.43
424 mike 1.111.2.1 StringRep StringRep::_empty_rep;
425 kumpf 1.43
426 mike 1.111.2.1 inline StringRep* StringRep::alloc(size_t cap)
427 mike 1.27 {
428 mike 1.111.2.1 StringRep* rep = (StringRep)::operator new( 429 sizeof(StringRep) + cap sizeof(Uint16)); 430 rep->cap = cap; 431 Atomic_create(&rep->refs, 1); 432 433 return rep;
434 mike 1.27 } 435
436 mike 1.111.2.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
437 mike 1.27 {
438 mike 1.111.2.1 if (cap > rep->cap \|\| Atomic_get(&rep->refs) != 1) 439 { 440 size_t n = _next_pow_2(cap); 441 StringRep* new_rep = StringRep::alloc(n); 442 new_rep->size = rep->size; 443 _copy(new_rep->data, rep->data, rep->size + 1); 444 StringRep::unref(rep); 445 rep = new_rep; 446 }
447 kumpf 1.39 }
448 tony 1.66
449 mike 1.111.2.1 StringRep* StringRep::create(const Uint16* data, size_t size)
450 kumpf 1.39 {
451 mike 1.111.2.1 StringRep* rep = StringRep::alloc(size); 452 rep->size = size; 453 _copy(rep->data, data, size); 454 rep->data[size] = '\0'; 455 return rep;
456 kumpf 1.39 } 457
458 mike 1.111.2.1 StringRep* StringRep::copy_on_write(StringRep* rep)
459 kumpf 1.39 {
460 mike 1.111.2.1 // Return a new copy of rep. Release rep.
461 david.dillard 1.105
462 mike 1.111.2.1 StringRep* new_rep = StringRep::alloc(rep->size); 463 new_rep->size = rep->size; 464 _copy(new_rep->data, rep->data, rep->size); 465 new_rep->data[new_rep->size] = '\0'; 466 StringRep::unref(rep); 467 return new_rep;
468 mike 1.27 } 469
470 mike 1.111.2.1 StringRep* StringRep::create(const char* data, size_t size)
471 kumpf 1.39 {
472 mike 1.111.2.1 StringRep* rep = StringRep::alloc(size); 473 rep->size = _convert((Uint16*)rep->data, data, size); 474 rep->data[rep->size] = '\0';
475 david.dillard 1.105
476 mike 1.111.2.1 return rep;
477 kumpf 1.39 } 478
479 mike 1.111.2.4 StringRep* StringRep::createASCII7(const char* data, size_t size)
480 mike 1.111.2.2 { 481 StringRep* rep = StringRep::alloc(size); 482 _copy((Uint16*)rep->data, data, size); 483 rep->data[rep->size = size] = '\0'; 484 return rep; 485 } 486
487 mike 1.111.2.1 Uint32 StringRep::length(const Uint16* str)
488 mike 1.27 {
489 mike 1.111.2.5 // Note: We could unroll this but it is rarely called.
490 david.dillard 1.105
491 mike 1.111.2.1 const Uint16* end = (Uint16)str; 492 493 while (end++) 494 ; 495 496 return end - str - 1;
497 mike 1.27 } 498
499 mike 1.111.2.1 //============================================================================== 500 // 501 // class String 502 // 503 //==============================================================================
504 david.dillard 1.105
505 mike 1.111.2.1 const String String::EMPTY; 506 507 String::String(const String& str, Uint32 n) 508 { 509 _check_bounds(n, str._rep->size); 510 _rep = StringRep::create(str._rep->data, n);
511 kumpf 1.39 }
512 mike 1.27
513 mike 1.111.2.1 String::String(const Char16* str)
514 kumpf 1.39 {
515 mike 1.111.2.1 _check_null_pointer(str); 516 _rep = StringRep::create((Uint16)str, StringRep::length((Uint16)str));
517 mike 1.27 } 518
519 mike 1.111.2.1 String::String(const Char16* str, Uint32 n)
520 mike 1.27 {
521 mike 1.111.2.1 _check_null_pointer(str); 522 _rep = StringRep::create((Uint16*)str, n);
523 mike 1.27 } 524
525 mike 1.111.2.1 String::String(const char* str)
526 mike 1.27 {
527 mike 1.111.2.1 _check_null_pointer(str); 528 _rep = StringRep::create(str, strlen(str));
529 mike 1.27 } 530
531 mike 1.111.2.2 String::String(const char* str, String::ASCII7Tag tag) 532 { 533 _check_null_pointer(str);
534 mike 1.111.2.4 _rep = StringRep::createASCII7(str, strlen(str));
535 mike 1.111.2.2 } 536
537 mike 1.111.2.1 String::String(const char* str, Uint32 n)
538 mike 1.27 {
539 mike 1.111.2.1 _check_null_pointer(str); 540 _rep = StringRep::create(str, n); 541 }
542 david.dillard 1.105
543 mike 1.111.2.2 String::String(const char* str, size_t n, String::ASCII7Tag tag) 544 { 545 _check_null_pointer(str);
546 mike 1.111.2.4 _rep = StringRep::createASCII7(str, n);
547 mike 1.111.2.2 } 548
549 mike 1.111.2.1 String::String(const String& s1, const String& s2) 550 { 551 size_t n1 = s1._rep->size; 552 size_t n2 = s2._rep->size; 553 size_t n = n1 + n2; 554 _rep = StringRep::alloc(n); 555 _copy(_rep->data, s1._rep->data, n1); 556 _copy(_rep->data + n1, s2._rep->data, n2); 557 _rep->size = n; 558 _rep->data[n] = '\0'; 559 } 560 561 String::String(const String& s1, const char* s2) 562 { 563 _check_null_pointer(s2); 564 size_t n1 = s1._rep->size; 565 size_t n2 = strlen(s2); 566 _rep = StringRep::alloc(n1 + n2); 567 _copy(_rep->data, s1._rep->data, n1); 568 _rep->size = n1 + _convert((Uint16)_rep->data + n1, s2, n2); 569 _rep->data[_rep->size] = '\0'; 570 mike 1.111.2.1 } 571 572 String::String(const char s1, const String& s2) 573 { 574 _check_null_pointer(s1); 575 size_t n1 = strlen(s1); 576 size_t n2 = s2._rep->size; 577 _rep = StringRep::alloc(n1 + n2); 578 _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1); 579 _copy(_rep->data + n1, s2._rep->data, n2); 580 _rep->data[_rep->size] = '\0';
581 mike 1.27 } 582
583 mike 1.111.2.1 String& String::assign(const String& str)
584 mike 1.27 {
585 mike 1.111.2.1 if (_rep != str._rep)
586 david.dillard 1.105 {
587 mike 1.111.2.1 StringRep::unref(_rep); 588 StringRep::ref(_rep = str._rep);
589 david.dillard 1.105 } 590
591 mike 1.27 return *this; 592 } 593
594 mike 1.111.2.1 String& String::assign(const Char16* str, Uint32 n)
595 chuck 1.102 {
596 mike 1.111.2.1 _check_null_pointer(str); 597 598 if (n > _rep->cap \|\| Atomic_get(&_rep->refs) != 1)
599 david.dillard 1.105 {
600 mike 1.111.2.1 StringRep::unref(_rep); 601 _rep = StringRep::alloc(n);
602 david.dillard 1.105 } 603
604 mike 1.111.2.1 _rep->size = n; 605 _copy(_rep->data, (Uint16*)str, n); 606 _rep->data[n] = '\0'; 607
608 chuck 1.102 return *this; 609 } 610
611 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
612 mike 1.27 {
613 mike 1.111.2.1 _check_null_pointer(str); 614 615 if (n > _rep->cap \|\| Atomic_get(&_rep->refs) != 1)
616 david.dillard 1.105 {
617 mike 1.111.2.1 StringRep::unref(_rep); 618 _rep = StringRep::alloc(n);
619 david.dillard 1.105 } 620
621 mike 1.111.2.1 _rep->size = _convert(_rep->data, str, n); 622 _rep->data[_rep->size] = 0; 623
624 mike 1.27 return *this; 625 } 626
627 mike 1.111.2.4 String& String::assignASCII7(const char* str, Uint32 n)
628 mike 1.111.2.2 { 629 _check_null_pointer(str); 630 631 if (n > _rep->cap \|\| Atomic_get(&_rep->refs) != 1) 632 { 633 StringRep::unref(_rep); 634 _rep = StringRep::alloc(n); 635 } 636 637 _copy(_rep->data, str, n); 638 _rep->data[_rep->size = n] = 0; 639 640 return *this; 641 } 642
643 kumpf 1.39 void String::clear() 644 {
645 mike 1.111.2.1 if (_rep->size) 646 { 647 if (Atomic_get(&_rep->refs) == 1) 648 _rep->size = 0; 649 else 650 { 651 StringRep::unref(_rep); 652 _rep = &StringRep::_empty_rep; 653 } 654 }
655 kumpf 1.39 } 656
657 mike 1.111.2.1 void String::reserveCapacity(Uint32 cap)
658 kumpf 1.39 {
659 mike 1.111.2.1 _reserve(_rep, cap);
660 kumpf 1.39 } 661
662 mike 1.111.2.1 CString String::getCString() const 663 { 664 #ifdef PEGASUS_STRING_NO_UTF8 665 char* str = (char*)operator new(_rep->size + 1); 666 _copy(str, _rep->data, _rep->size); 667 str[_rep->size] = '\0'; 668 return CString(str);
669 gs.keenan 1.110 #else
670 mike 1.111.2.1 Uint32 n = 3 * _rep->size; 671 char* str = (char*)operator new(n + 1);
672 mike 1.111.2.2 size_t size = _copy_to_utf8(str, _rep->data, _rep->size);
673 mike 1.111.2.1 str[size] = '\0'; 674 return CString(str);
675 gs.keenan 1.110 #endif
676 kumpf 1.39 } 677
678 mike 1.111.2.1 String& String::append(const Char16* str, Uint32 n)
679 kumpf 1.39 {
680 mike 1.111.2.1 _check_null_pointer(str);
681 kumpf 1.39
682 mike 1.111.2.1 size_t old_size = _rep->size; 683 size_t new_size = old_size + n; 684 _reserve(_rep, new_size); 685 _copy(_rep->data + old_size, (Uint16*)str, n); 686 _rep->size = new_size; 687 _rep->data[new_size] = '\0';
688 mike 1.27
689 mike 1.111.2.1 return *this;
690 mike 1.27 } 691
692 mike 1.111.2.1 String& String::append(const String& str)
693 mike 1.27 {
694 mike 1.111.2.1 return append((Char16*)str._rep->data, str._rep->size);
695 mike 1.27 } 696
697 mike 1.111.2.1 String& String::append(const char* str, Uint32 size)
698 kumpf 1.39 {
699 mike 1.111.2.1 _check_null_pointer(str); 700 701 size_t old_size = _rep->size; 702 size_t cap = old_size + size; 703 704 _reserve(_rep, cap); 705 _rep->size += _convert((Uint16*)_rep->data + old_size, str, size); 706 _rep->data[_rep->size] = '\0';
707 kumpf 1.39
708 mike 1.27 return *this; 709 } 710
711 mike 1.111.2.1 void String::remove(Uint32 index, Uint32 n)
712 mike 1.27 {
713 mike 1.111.2.1 if (n == PEG_NOT_FOUND) 714 n = _rep->size - index;
715 mike 1.27
716 mike 1.111.2.1 _check_bounds(index + n, _rep->size); 717 718 if (Atomic_get(&_rep->refs) != 1) 719 _rep = StringRep::copy_on_write(_rep);
720 mike 1.27
721 mike 1.111.2.1 assert(index + n <= _rep->size);
722 mike 1.27
723 mike 1.111.2.1 size_t rem = _rep->size - (index + n); 724 Uint16* data = _rep->data; 725 726 if (rem) 727 memmove(data + index, data + index + n, rem * sizeof(Uint16)); 728 729 _rep->size -= n; 730 data[_rep->size] = '\0';
731 mike 1.27 } 732
733 mike 1.111.2.1 String String::subString(Uint32 index, Uint32 n) const
734 mike 1.27 {
735 mike 1.111.2.5 // Note: this implementation is very permissive but used for
736 mike 1.111.2.1 // backwards compatibility. 737 738 if (index < _rep->size)
739 mike 1.27 {
740 mike 1.111.2.1 if (n == PEG_NOT_FOUND \|\| n > _rep->size - index) 741 n = _rep->size - index;
742 mike 1.27
743 mike 1.111.2.1 return String((Char16*)_rep->data + index, n);
744 mike 1.27 }
745 david.dillard 1.105 746 return String();
747 mike 1.27 } 748 749 Uint32 String::find(Char16 c) const 750 {
751 mike 1.111.2.1 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
752 mike 1.27
753 mike 1.111.2.1 if (p) 754 return p - _rep->data;
755 mike 1.27 756 return PEG_NOT_FOUND; 757 } 758
759 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
760 mike 1.30 {
761 mike 1.111.2.1 _check_bounds(index, _rep->size);
762 mike 1.30
763 mike 1.111.2.1 if (index >= _rep->size) 764 return PEG_NOT_FOUND; 765 766 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c); 767 768 if (p) 769 return p - _rep->data;
770 mike 1.30 771 return PEG_NOT_FOUND; 772 } 773
774 mike 1.111.2.1 Uint32 String::_find_aux(const Char16* s, Uint32 n) const
775 mike 1.27 {
776 mike 1.111.2.1 _check_null_pointer(s);
777 mike 1.27
778 mike 1.111.2.1 const Uint16* data = _rep->data; 779 size_t rem = _rep->size;
780 mike 1.30
781 mike 1.111.2.1 while (n <= rem)
782 mike 1.27 {
783 mike 1.111.2.1 Uint16* p = (Uint16*)_find(data, rem, s[0]);
784 david.dillard 1.105
785 mike 1.111.2.1 if (!p) 786 break; 787 788 if (memcmp(p, s, n * sizeof(Uint16)) == 0) 789 return p - _rep->data; 790 791 p++; 792 rem -= p - data; 793 data = p;
794 mike 1.27 }
795 mike 1.111.2.1
796 mike 1.27 return PEG_NOT_FOUND; 797 } 798
799 mike 1.111.2.1 Uint32 String::find(const char* s) const 800 { 801 _check_null_pointer(s); 802
803 mike 1.111.2.5 // Note: could optimize away creation of temporary, but this is rarely 804 // called.
805 mike 1.111.2.1 return find(String(s)); 806 } 807
808 mike 1.27 Uint32 String::reverseFind(Char16 c) const 809 {
810 mike 1.111.2.1 Uint16 x = c; 811 Uint16* p = _rep->data; 812 Uint16* q = _rep->data + _rep->size;
813 mike 1.27
814 mike 1.111.2.1 while (q != p)
815 mike 1.27 {
816 mike 1.111.2.1 if (*--q == x) 817 return q - p;
818 mike 1.27 } 819 820 return PEG_NOT_FOUND; 821 } 822 823 void String::toLower() 824 {
825 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU 826
827 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
828 david 1.90 {
829 mike 1.111.2.1 //// First calculate size of resulting string. u_strToLower() returns 830 //// only the size when zero is passed as the destination size argument. 831
832 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 833
834 mike 1.111.2.1 int32_t new_size = u_strToLower( 835 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
836 chuck 1.99
837 mike 1.111.2.1 //// Reserve enough space for the result. 838 839 if ((Uint32)new_size > _rep->cap) 840 _reserve(_rep, new_size); 841 842 //// Perform the conversion (overlapping buffers are allowed). 843 844 u_strToLower((UChar)_rep->data, new_size, 845 (UChar)_rep->data, _rep->size, NULL, &err);
846 yi.zhou 1.108
847 mike 1.111.2.1 _rep->size = new_size;
848 david 1.90 }
849 mike 1.111.2.1 850 #endif /* PEGASUS_STRING_ENABLE_ICU / 851 852 if (Atomic_get(&_rep->refs) != 1) 853 _rep = StringRep::copy_on_write(_rep); 854 855 Uint16 p = _rep->data; 856 size_t n = _rep->size; 857 858 for (; n--; p++)
859 david 1.90 {
860 mike 1.111.2.1 if (!(p & 0xFF00)) 861 p = _to_lower(*p);
862 mike 1.27 }
863 kumpf 1.39 } 864
865 chuck 1.99 void String::toUpper()
866 david 1.90 {
867 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU 868
869 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
870 chuck 1.99 {
871 mike 1.111.2.1 //// First calculate size of resulting string. u_strToUpper() returns 872 //// only the size when zero is passed as the destination size argument. 873
874 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR; 875
876 mike 1.111.2.1 int32_t new_size = u_strToUpper( 877 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
878 chuck 1.99
879 mike 1.111.2.1 //// Reserve enough space for the result.
880 yi.zhou 1.108
881 mike 1.111.2.1 if ((Uint32)new_size > _rep->cap) 882 _reserve(_rep, new_size);
883 david 1.90
884 mike 1.111.2.1 //// Perform the conversion (overlapping buffers are allowed).
885 kumpf 1.39
886 mike 1.111.2.1 u_strToUpper((UChar)_rep->data, new_size, 887 (UChar)_rep->data, _rep->size, NULL, &err);
888 mike 1.27
889 mike 1.111.2.1 _rep->size = new_size;
890 mike 1.27 } 891
892 mike 1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU / 893 894 if (Atomic_get(&_rep->refs) != 1) 895 _rep = StringRep::copy_on_write(_rep); 896 897 Uint16 p = _rep->data; 898 size_t n = _rep->size; 899 900 for (; n--; p++) 901 p = _to_upper(p);
902 mike 1.27 } 903
904 mike 1.111.2.1 int String::compare(const String& s1, const String& s2, Uint32 n)
905 mike 1.30 {
906 mike 1.111.2.1 assert(n <= s1._rep->size); 907 assert(n <= s2._rep->size);
908 kumpf 1.43
909 mike 1.111.2.1 // Ignoring error in which n is greater than s1.size() or s2.size() 910 return _compare(s1._rep->data, s2._rep->data, n); 911 }
912 mike 1.30
913 mike 1.111.2.1 int String::compare(const String& s1, const String& s2) 914 { 915 return _compare(s1._rep->data, s2._rep->data); 916 }
917 mike 1.30
918 mike 1.111.2.1 int String::compare(const String& s1, const char* s2) 919 { 920 _check_null_pointer(s2);
921 mike 1.30
922 mike 1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8 923 return _compare_no_utf8(s1._rep->data, s2); 924 #else 925 // ATTN: optimize this! 926 return String::compare(s1, String(s2)); 927 #endif
928 mike 1.30 } 929
930 mike 1.111.2.1 int String::compareNoCase(const String& str1, const String& str2)
931 kumpf 1.40 {
932 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU 933
934 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 935 {
936 mike 1.111.2.1 return u_strcasecmp( 937 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
938 yi.zhou 1.108 }
939 kumpf 1.40
940 mike 1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
941 kumpf 1.40
942 mike 1.111.2.1 const Uint16* s1 = str1._rep->data; 943 const Uint16* s2 = str2._rep->data; 944 945 while (s1 && s2) 946 { 947 int r = _to_lower(s1++) - _to_lower(s2++);
948 kumpf 1.40
949 david.dillard 1.105 if (r) 950 return r;
951 kumpf 1.40 } 952
953 mike 1.111.2.1 if (*s2)
954 david.dillard 1.105 return -1;
955 mike 1.111.2.1 else if (*s1)
956 david.dillard 1.105 return 1;
957 kumpf 1.40 958 return 0; 959 } 960
961 mike 1.111.2.1 Boolean String::equalNoCase_aux(const String& s1, const String& s2)
962 mike 1.27 {
963 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
964 mike 1.27
965 mike 1.111.2.1 return String::compareNoCase(s1, s2) == 0;
966 kumpf 1.39
967 mike 1.111.2.1 #else /* PEGASUS_STRING_ENABLE_ICU */
968 kumpf 1.39
969 mike 1.111.2.1 Uint16* p = (Uint16)s1._rep->data; 970 Uint16 q = (Uint16*)s2._rep->data; 971 Uint32 n = s2._rep->size; 972 973 while (n >= 8) 974 { 975 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) \|\| 976 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) \|\| 977 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) \|\| 978 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) \|\| 979 ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) \|\| 980 ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) \|\| 981 ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) \|\| 982 ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7])))) 983 { 984 return false; 985 } 986 987 n -= 8; 988 p += 8; 989 q += 8; 990 mike 1.111.2.1 } 991 992 while (n >= 4) 993 { 994 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) \|\| 995 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) \|\| 996 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) \|\| 997 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3])))) 998 { 999 return false; 1000 } 1001 1002 n -= 4; 1003 p += 4; 1004 q += 4; 1005 }
1006 mike 1.27
1007 kumpf 1.39 while (n--) 1008 {
1009 mike 1.111.2.1 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0])))) 1010 return false; 1011 1012 p++; 1013 q++;
1014 kumpf 1.39 }
1015 mike 1.28
1016 kumpf 1.39 return true;
1017 david 1.69
1018 mike 1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */ 1019 }
1020 mike 1.27
1021 mike 1.111.2.1 Boolean String::equalNoCase(const String& s1, const char* s2)
1022 david 1.69 {
1023 mike 1.111.2.1 _check_null_pointer(s2);
1024 david 1.69
1025 mike 1.111.2.1 #if defined(PEGASUS_STRING_ENABLE_ICU)
1026 david.dillard 1.105
1027 mike 1.111.2.1 return String::equalNoCase(s1, String(s2));
1028 david 1.71
1029 mike 1.111.2.1 #elif defined(PEGASUS_STRING_NO_UTF8)
1030 kumpf 1.42
1031 mike 1.111.2.1 const Uint16* p1 = (Uint16)s1._rep->data; 1032 const char p2 = s2; 1033 size_t n = s1._rep->size;
1034 karl 1.36
1035 mike 1.111.2.1 while (n--) 1036 { 1037 if (!*p2) 1038 return false;
1039 david.dillard 1.105
1040 mike 1.111.2.1 if (_to_upper(p1++) != CharSet::to_upper(int(p2++))) 1041 return false; 1042 } 1043 1044 return true;
1045 david.dillard 1.105
1046 mike 1.111.2.1 #else /* PEGASUS_STRING_ENABLE_ICU */
1047 chuck 1.78
1048 mike 1.111.2.1 // ATTN: optimize this! 1049 return String::equalNoCase(s1, String(s2));
1050 david.dillard 1.105
1051 mike 1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */ 1052 }
1053 karl 1.36
1054 mike 1.111.2.1 Boolean String::equal(const String& s1, const String& s2)
1055 karl 1.36 {
1056 mike 1.111.2.1 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 1057 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1058 karl 1.36 } 1059
1060 mike 1.111.2.1 Boolean String::equal(const String& s1, const char* s2)
1061 karl 1.36 {
1062 mike 1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
1063 kumpf 1.39
1064 mike 1.111.2.1 _check_null_pointer(s2);
1065 kumpf 1.39
1066 mike 1.111.2.1 const Uint16* p = (Uint16)s1._rep->data; 1067 const char q = s2;
1068 kumpf 1.39
1069 mike 1.111.2.1 while (p && q) 1070 { 1071 if (p++ != Uint16(q++)) 1072 return false; 1073 }
1074 kumpf 1.39
1075 mike 1.111.2.1 return !(p \|\| q);
1076 kumpf 1.39
1077 mike 1.111.2.1 #else /* PEGASUS_STRING_NO_UTF8 */
1078 kumpf 1.39
1079 mike 1.111.2.1 return String::equal(s1, String(s2)); 1080 1081 #endif /* PEGASUS_STRING_NO_UTF8 */
1082 kumpf 1.39 } 1083
1084 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1085 kumpf 1.39 {
1086 david 1.69 #if defined(PEGASUS_OS_OS400)
1087 mike 1.111.2.1
1088 david 1.93 CString cstr = str.getCString();
1089 david 1.69 const char* utf8str = cstr; 1090 os << utf8str; 1091
1092 mike 1.111.2.1 #elif defined(PEGASUS_STRING_ENABLE_ICU) 1093
1094 yi.zhou 1.108 if (InitializeICU::initICUSuccessful()) 1095 {
1096 david.dillard 1.105 char buf = NULL; 1097 const int size = str.size() 6;
1098 mike 1.111.2.1 UnicodeString UniStr( 1099 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1100 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf); 1101 buf = new char[bufsize+1]; 1102 UniStr.extract(0,bufsize,buf); 1103 os << buf; 1104 os.flush(); 1105 delete [] buf;
1106 yi.zhou 1.108 }
1107 mike 1.111.2.1 1108 #endif /* PEGASUS_OS_OS400 */ 1109 1110 for (Uint32 i = 0, n = str.size(); i < n; i++)
1111 yi.zhou 1.108 {
1112 mike 1.111.2.1 Uint16 code = str[i];
1113 david.dillard 1.105
1114 mike 1.111.2.1 if (code > 0 && !(code & 0xFF00)) 1115 os << char(code); 1116 else 1117 { 1118 // Print in hex format: 1119 char buffer[8]; 1120 sprintf(buffer, "\\x%04X", code); 1121 os << buffer; 1122 }
1123 yi.zhou 1.108 }
1124 kumpf 1.39 1125 return os; 1126 } 1127
1128 mike 1.111.2.1 void String::_append_char_aux()
1129 kumpf 1.39 {
1130 mike 1.111.2.1 StringRep* tmp;
1131 kumpf 1.39
1132 mike 1.111.2.1 if (_rep->cap) 1133 { 1134 tmp = StringRep::alloc(2 * _rep->cap); 1135 tmp->size = _rep->size; 1136 _copy(tmp->data, _rep->data, _rep->size); 1137 } 1138 else 1139 { 1140 tmp = StringRep::alloc(8); 1141 tmp->size = 0; 1142 }
1143 kumpf 1.39
1144 mike 1.111.2.1 StringRep::unref(_rep); 1145 _rep = tmp;
1146 kumpf 1.39 } 1147
1148 mike 1.111.2.1 PEGASUS_NAMESPACE_END
1149 kumpf 1.39
1150 mike 1.111.2.1 /* 1151 ================================================================================
1152 kumpf 1.39
1153 mike 1.111.2.1 String optimizations: 1154 1155 1. Added mechanism allowing certain functions to be inlined only when 1156 used by internal Pegasus modules. External modules (i.e., providers) 1157 link to a non-inline version, which allows for binary compatibility. 1158 1159 2. Implemented copy-on-write with atomic increment/decrement. This 1160 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment 1161 for the 'ni1000' benchmark. 1162 1163 3. Employed loop unrolling in several places. For example, see: 1164 1165 static Uint16* _find(const Uint16* s, size_t n, Uint16 c); 1166 1167 4. Used the "empty-rep" optimization (described in whitepaper from the 1168 GCC Developers Summit). This reduced default construction to a simple 1169 pointer assignment. 1170 1171 inline String::String() : _rep(&_empty_rep) { } 1172 1173 5. Implemented Uint16 versions of toupper() and tolower() using tables. 1174 mike 1.111.2.1 For example: 1175 1176 static const char _upper[] = 1177 { 1178 0,1,2,...255 1179 }; 1180 1181 inline Uint16 _to_upper(Uint16 x) 1182 { 1183 return (x & 0xFF00) ? x : _upper[x]; 1184 } 1185 1186 This outperforms the system implementation by avoiding an anding 1187 operation. 1188 1189 6. Implemented char* version of the following member functions to 1190 eliminate unecessary creation of anonymous string objects 1191 (temporaries). 1192 1193 String(const String& s1, const char* s2); 1194 String(const char* s1, const String& s2); 1195 mike 1.111.2.1 String& String::operator=(const char* str); 1196 Uint32 String::find(const char* s) const; 1197 bool String::equal(const String& s1, const char* s2); 1198 static int String::compare(const String& s1, const char* s2); 1199 String& String::append(const char* str); 1200 String& String::append(const char* str, Uint32 size); 1201 static bool String::equalNoCase(const String& s1, const char* s2); 1202 String& operator=(const char* str) 1203 String& String::assign(const char* str) 1204 String& String::append(const char* str) 1205 Boolean operator==(const String& s1, const char* s2) 1206 Boolean operator==(const char* s1, const String& s2) 1207 Boolean operator!=(const String& s1, const char* s2) 1208 Boolean operator!=(const char* s1, const String& s2) 1209 Boolean operator<(const String& s1, const char* s2) 1210 Boolean operator<(const char* s1, const String& s2) 1211 Boolean operator>(const String& s1, const char* s2) 1212 Boolean operator>(const char* s1, const String& s2) 1213 Boolean operator<=(const String& s1, const char* s2) 1214 Boolean operator<=(const char* s1, const String& s2) 1215 Boolean operator>=(const String& s1, const char* s2) 1216 mike 1.111.2.1 Boolean operator>=(const char* s1, const String& s2) 1217 String operator+(const String& s1, const char* s2) 1218 String operator+(const char* s1, const String& s2) 1219 1220 7. Optimized _next_pow_2(), used in rounding the capacity to the next 1221 power of two (algorithm from the book "Hacker's Delight"). 1222 1223 static Uint32 _next_pow_2(Uint32 x) 1224 { 1225 if (x < 8) 1226 return 8; 1227 1228 x--; 1229 x \|= (x >> 1); 1230 x \|= (x >> 2); 1231 x \|= (x >> 4); 1232 x \|= (x >> 8); 1233 x \|= (x >> 16); 1234 x++; 1235 1236 return x; 1237 mike 1.111.2.1 } 1238 1239 8. Implemented "concatenating constructors" to eliminate temporaries 1240 created by operator+(). This scheme employs the "return-value 1241 optimization" described by Stan Lippman. 1242 1243 inline String operator+(const String& s1, const String& s2) 1244 { 1245 return String(s1, s2, 0); 1246 } 1247 1248 9. Experimented to find the optimial initial size for a short string. 1249 Eight seems to offer the best tradoff between space and time. 1250 1251 10. Inlined all members of the Char16 class. 1252 1253 11. Used Uint16 internally in the String class. This showed no improvememnt 1254 since Char16 was already fully inlined and was essentially reduced to 1255 Uint16 in any case. 1256 1257 12. Implemented conditional logic (#if) allowing error checking logic to 1258 mike 1.111.2.1 be excluded to better performance. Examples include bounds checking 1259 and null-pointer checking. 1260 1261 13. Used memcpy() and memcmp() where possible. These are implemented using 1262 the rep family of intructions under Intel and are much faster. 1263
1264 mike 1.111.2.2 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 1265 copy routine overhead. 1266 1267 15. Added ASCII7 form of the constructor and assign(). 1268 1269 String s("hello world", String::ASCII7); 1270
1271 mike 1.111.2.4 s.assignASCII7("hello world");
1272 mike 1.111.2.2
1273 mike 1.111.2.6 This avoids slower UTF8 processing when not needed.
1274 mike 1.111.2.2
1275 mike 1.111.2.1 ================================================================================ 1276 */

No CVS admin address has been configured