(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30                     // Author: Mike Brasher (mbrasher@bmc.com)
  31                     //
  32 mike          1.111.2.9 // Modified By: 
  33                         //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  34                         //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  35                         //     David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
  36                         //     Mike Brasher (mike-brasher@austin.rr.com)
  37                         //
  38 mike          1.27      //%/////////////////////////////////////////////////////////////////////////////
  39                         
  40 mike          1.111.2.6 #define PEGASUS_USE_INTERNAL_INLINES
  41 mike          1.27      #include "String.h"
  42 mike          1.111.2.6 #include <cassert>
  43 kumpf         1.48      #include "InternalException.h"
  44 david         1.69      #include "CommonUTF.h"
  45 mike          1.111.2.1 #include "CharSet.h"
  46 david         1.69      
  47 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
  48 chuck         1.99      #include <unicode/ustring.h>
  49                         #include <unicode/uchar.h>
  50 david         1.69      #endif
  51                         
  52 mike          1.27      PEGASUS_NAMESPACE_BEGIN
  53                         
  54 mike          1.111.2.1 //==============================================================================
  55 kumpf         1.39      //
  56 mike          1.111.2.7 // Compile-time macros (undefined by default).
  57 mike          1.111.2.6 //
  58 mike          1.111.2.7 //     PEGASUS_STRING_ENABLE_ICU -- enables use of ICU package
  59 mike          1.111.2.6 //
  60                         //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  61                         //      
  62                         //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  63                         //
  64                         //     PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature.
  65                         //
  66                         //==============================================================================
  67                         
  68                         //==============================================================================
  69                         //
  70 mike          1.111.2.1 // File-scope definitions:
  71 kumpf         1.54      //
  72 mike          1.111.2.1 //==============================================================================
  73 kumpf         1.54      
  74 mike          1.111.2.1 // Converts 16-bit characters to upper case.
  75                         inline Uint16 _to_upper(Uint16 x)
  76 kumpf         1.54      {
  77 mike          1.111.2.1     return (x & 0xFF00) ? x : CharSet::to_upper(x);
  78 kumpf         1.54      }
  79                         
  80 mike          1.111.2.1 // Converts 16-bit characters to lower case.
  81                         inline Uint16 _to_lower(Uint16 x)
  82 kumpf         1.54      {
  83 mike          1.111.2.1     return (x & 0xFF00) ? x : CharSet::to_lower(x);
  84                         }
  85 kumpf         1.82      
  86 mike          1.111.2.1 // Rounds x to the next power of two (or just returns 8 if x < 8).
  87                         static Uint32 _next_pow_2(Uint32 x)
  88                         {
  89                             if (x < 8)
  90                         	return 8;
  91                         
  92                             x--;
  93                             x |= (x >> 1);
  94                             x |= (x >> 2);
  95                             x |= (x >> 4);
  96                             x |= (x >> 8);
  97                             x |= (x >> 16);
  98                             x++;
  99                         
 100                             return x;
 101                         }
 102                         
 103                         template<class P, class Q>
 104                         static void _copy(P* p, const Q* q, size_t n)
 105                         {
 106                             // Use loop unrolling.
 107 mike          1.111.2.1 
 108                             while (n >= 8)
 109                             {
 110                         	p[0] = q[0];
 111                         	p[1] = q[1];
 112                         	p[2] = q[2];
 113                         	p[3] = q[3];
 114                         	p[4] = q[4];
 115                         	p[5] = q[5];
 116                         	p[6] = q[6];
 117                         	p[7] = q[7];
 118                         	p += 8;
 119                         	q += 8;
 120                         	n -= 8;
 121                             }
 122                         
 123                             while (n >= 4)
 124 kumpf         1.82          {
 125 mike          1.111.2.1 	p[0] = q[0];
 126                         	p[1] = q[1];
 127                         	p[2] = q[2];
 128                         	p[3] = q[3];
 129                         	p += 4;
 130                         	q += 4;
 131                         	n -= 4;
 132 kumpf         1.82          }
 133 mike          1.111.2.1 
 134                             while (n--)
 135                         	*p++ = *q++;
 136 kumpf         1.54      }
 137                         
 138 mike          1.111.2.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 139 kumpf         1.54      {
 140 mike          1.111.2.1     while (n >= 4)
 141                             {
 142                         	if (s[0] == c)
 143                         	    return (Uint16*)s;
 144                         	if (s[1] == c)
 145                         	    return (Uint16*)&s[1];
 146                         	if (s[2] == c)
 147                         	    return (Uint16*)&s[2];
 148                         	if (s[3] == c)
 149                         	    return (Uint16*)&s[3];
 150                         
 151                         	n -= 4;
 152                         	s += 4;
 153                             }
 154                         
 155                             if (n)
 156                             {
 157                         	if (*s == c)
 158                         	    return (Uint16*)s;
 159                         	s++;
 160                         	n--;
 161 mike          1.111.2.1     }
 162                         
 163                             if (n)
 164                             {
 165                         	if (*s == c)
 166                         	    return (Uint16*)s;
 167                         	s++;
 168                         	n--;
 169                             }
 170                         
 171                             if (n && *s == c)
 172                         	return (Uint16*)s;
 173                         
 174                             // Not found!
 175                             return 0;
 176 kumpf         1.54      }
 177                         
 178 mike          1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2)
 179 kumpf         1.54      {
 180 mike          1.111.2.1     while (*s1 && *s2)
 181 kumpf         1.82          {
 182 mike          1.111.2.1         int r = *s1++ - *s2++;
 183                         
 184                                 if (r)
 185                                     return r;
 186 kumpf         1.82          }
 187 mike          1.111.2.1 
 188                             if (*s2)
 189                                 return -1;
 190                             else if (*s1)
 191                                 return 1;
 192                         
 193                             return 0;
 194 kumpf         1.54      }
 195                         
 196 mike          1.111.2.1 static int _compare_no_utf8(const Uint16* s1, const char* s2)
 197 kumpf         1.56      {
 198 mike          1.111.2.1     Uint16 c1;
 199                             Uint16 c2;
 200                         
 201                             do
 202 kumpf         1.81          {
 203 mike          1.111.2.1 	c1 = *s1++;
 204                         	c2 = *s2++;
 205                         
 206                         	if (c1 == 0)
 207                         	    return c1 - c2;
 208 kumpf         1.81          }
 209 mike          1.111.2.1     while (c1 == c2);
 210                         
 211                             return c1 - c2;
 212 kumpf         1.56      }
 213                         
 214 mike          1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
 215 kumpf         1.54      {
 216 mike          1.111.2.1     // This should only be called when s1 and s2 have the same length.
 217                         
 218                             while (n-- && (*s1++ - *s2++) == 0)
 219                         	;
 220                         
 221 mike          1.111.2.8     // 
 222                         
 223 mike          1.111.2.1     return s1[-1] - s2[-1];
 224 kumpf         1.54      }
 225                         
 226 mike          1.111.2.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 227                         {
 228                             memcpy(s1, s2, n * sizeof(Uint16));
 229                         }
 230 kumpf         1.39      
 231 mike          1.111.2.1 void String_throw_out_of_bounds()
 232                         {
 233                             throw IndexOutOfBoundsException();
 234                         }
 235                         
 236                         #ifdef PEGASUS_STRING_NO_THROW
 237                         # define _check_null_pointer(ARG) /* empty */
 238                         #else
 239                         template<class T>
 240                         inline void _check_null_pointer(const T* ptr)
 241                         {
 242                             if (!ptr)
 243                         	throw NullPointer();
 244                         }
 245                         #endif
 246 mike          1.27      
 247 mike          1.111.2.1 static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)
 248 mike          1.27      {
 249 mike          1.111.2.1     Uint16* p = dest;
 250                             const Uint8* q = (const Uint8*)src;
 251 mike          1.111.2.2 
 252                             // Process leading 7-bit ASCII characters (to avoid UTF8 overhead below
 253                             // this loop). Use factor-four loop-unrolling.
 254                         
 255                             while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 256                             {
 257                         	p[0] = q[0];
 258                         	p[1] = q[1];
 259                         	p[2] = q[2];
 260                         	p[3] = q[3];
 261                         	p += 4;
 262                         	q += 4;
 263                         	n -= 4;
 264                             }
 265                         
 266                             switch (n)
 267                             {
 268                         	case 0:
 269                         	    return p - dest;
 270                         	case 1:
 271                         	    if (q[0] < 128)
 272 mike          1.111.2.2 	    {
 273                         		p[0] = q[0];
 274                         		return p + 1 - dest;
 275                         	    }
 276                         	    break;
 277                         	case 2:
 278                         	    if (q[0] < 128 && q[1] < 128)
 279                         	    {
 280                         		p[0] = q[0];
 281                         		p[1] = q[1];
 282                         		return p + 2 - dest;
 283                         	    }
 284                         	    break;
 285                         	case 3:
 286                         	    if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 287                         	    {
 288                         		p[0] = q[0];
 289                         		p[1] = q[1];
 290                         		p[2] = q[2];
 291                         		return p + 3 - dest;
 292                         	    }
 293 mike          1.111.2.2 	    break;
 294                             }
 295                         
 296                             // Process remaining characters.
 297 mike          1.111.2.1 
 298                             while (n)
 299                             {
 300 mike          1.111.2.2 	// Optimize for 7-bit ASCII case.
 301 mike          1.111.2.1 
 302 mike          1.111.2.2 	if (*q < 128)
 303 mike          1.111.2.1 	{
 304                         	    *p++ = *q++;
 305                         	    n--;
 306                         	}
 307 mike          1.111.2.2 	else
 308                         	{
 309                         	    Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 310 mike          1.111.2.1 
 311 mike          1.111.2.2 	    if (c > n || !isValid_U8(q, c) ||
 312                         		UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 313                         	    {
 314                         		throw Exception("Bad UTF8 encoding");
 315                         	    }
 316 mike          1.111.2.1 
 317 mike          1.111.2.2 	    n -= c;
 318 mike          1.111.2.1 	}
 319                             }
 320 mike          1.27      
 321 mike          1.111.2.1     return p - dest;
 322                         }
 323 mike          1.27      
 324 mike          1.111.2.1 // Note: dest must be at least three times src (plus an extra byte for 
 325                         // terminator).
 326 mike          1.111.2.2 static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)
 327 mike          1.111.2.1 {
 328 mike          1.111.2.2     const Uint16* q = src;
 329                             Uint8* p = (Uint8*)dest;
 330                         
 331                             while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 332                             {
 333                         	p[0] = q[0];
 334                         	p[1] = q[1];
 335                         	p[2] = q[2];
 336                         	p[3] = q[3];
 337                         	p += 4;
 338                         	q += 4;
 339                         	n -= 4;
 340                             }
 341                         
 342                             switch (n)
 343                             {
 344                         	case 0:
 345                         	    return p - (Uint8*)dest;
 346                         	case 1:
 347                         	    if (q[0] < 128)
 348                         	    {
 349 mike          1.111.2.2 		p[0] = q[0];
 350                         		return p + 1 - (Uint8*)dest;
 351                         	    }
 352                         	    break;
 353                         	case 2:
 354                         	    if (q[0] < 128 && q[1] < 128)
 355                         	    {
 356                         		p[0] = q[0];
 357                         		p[1] = q[1];
 358                         		return p + 2 - (Uint8*)dest;
 359                         	    }
 360                         	    break;
 361                         	case 3:
 362                         	    if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 363                         	    {
 364                         		p[0] = q[0];
 365                         		p[1] = q[1];
 366                         		p[2] = q[2];
 367                         		return p + 3 - (Uint8*)dest;
 368                         	    }
 369                         	    break;
 370 mike          1.111.2.2     }
 371                         
 372                             // If this line was reached, there must be characters greater than 128.
 373                         
 374                             UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 375                         
 376                             return p - (Uint8*)dest;
 377 mike          1.111.2.1 }
 378 mike          1.27      
 379 mike          1.111.2.1 static inline size_t _convert(Uint16* p, const char* q, size_t n)
 380                         {
 381                         #ifdef PEGASUS_STRING_NO_UTF8
 382                             _copy(p, q, n);
 383 mike          1.27          return n;
 384 mike          1.111.2.1 #else
 385                             return _copy_from_utf8(p, q, n);
 386                         #endif
 387 mike          1.27      }
 388                         
 389 mike          1.111.2.1 //==============================================================================
 390 chuck         1.102     //
 391 mike          1.111.2.1 // class CString
 392 chuck         1.102     //
 393 mike          1.111.2.1 //==============================================================================
 394                         
 395                         CString::CString(const CString& cstr) : _rep(0)
 396 chuck         1.102     {
 397 mike          1.111.2.1     if (cstr._rep)
 398 chuck         1.102         {
 399 mike          1.111.2.1 	size_t n = strlen(cstr._rep) + 1;
 400                                 _rep = (char*)operator new(n);
 401                         	memcpy(_rep, cstr._rep, n);
 402                             }
 403                         }
 404                         
 405                         CString& CString::operator=(const CString& cstr)
 406                         {
 407                             if (&cstr != this)
 408                             {
 409                                 if (_rep)
 410 david.dillard 1.105             {
 411 mike          1.111.2.1             operator delete(_rep);
 412                                     _rep = 0;
 413 chuck         1.102             }
 414 mike          1.111.2.1 
 415                                 if (cstr._rep)
 416 chuck         1.102             {
 417 mike          1.111.2.1 	    size_t n = strlen(cstr._rep) + 1;
 418                                     _rep = (char*)operator new(n);
 419                         	    memcpy(_rep, cstr._rep, n);
 420 chuck         1.102             }
 421 mike          1.111.2.1     }
 422 chuck         1.102     
 423 mike          1.111.2.1     return *this;
 424 chuck         1.102     }
 425                         
 426 mike          1.111.2.1 //==============================================================================
 427                         //
 428                         // class StringRep
 429                         //
 430                         //==============================================================================
 431 kumpf         1.43      
 432 mike          1.111.2.1 StringRep StringRep::_empty_rep;
 433 kumpf         1.43      
 434 mike          1.111.2.1 inline StringRep* StringRep::alloc(size_t cap)
 435 mike          1.27      {
 436 mike          1.111.2.1     StringRep* rep = (StringRep*)::operator new(
 437                         	sizeof(StringRep) + cap * sizeof(Uint16));
 438                             rep->cap = cap;
 439                             Atomic_create(&rep->refs, 1);
 440                         
 441                             return rep;
 442 mike          1.27      }
 443                         
 444 mike          1.111.2.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
 445 mike          1.27      {
 446 mike          1.111.2.1     if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
 447                             {
 448                         	size_t n = _next_pow_2(cap);
 449                         	StringRep* new_rep = StringRep::alloc(n);
 450                         	new_rep->size = rep->size;
 451                         	_copy(new_rep->data, rep->data, rep->size + 1);
 452                         	StringRep::unref(rep);
 453                         	rep = new_rep;
 454                             }
 455 kumpf         1.39      }
 456 tony          1.66      
 457 mike          1.111.2.1 StringRep* StringRep::create(const Uint16* data, size_t size)
 458 kumpf         1.39      {
 459 mike          1.111.2.1     StringRep* rep = StringRep::alloc(size);
 460                             rep->size = size;
 461                             _copy(rep->data, data, size);
 462                             rep->data[size] = '\0';
 463                             return rep;
 464 kumpf         1.39      }
 465                         
 466 mike          1.111.2.1 StringRep* StringRep::copy_on_write(StringRep* rep)
 467 kumpf         1.39      {
 468 mike          1.111.2.1     // Return a new copy of rep. Release rep.
 469 david.dillard 1.105     
 470 mike          1.111.2.1     StringRep* new_rep = StringRep::alloc(rep->size);
 471                             new_rep->size = rep->size;
 472                             _copy(new_rep->data, rep->data, rep->size);
 473                             new_rep->data[new_rep->size] = '\0';
 474                             StringRep::unref(rep);
 475                             return new_rep;
 476 mike          1.27      }
 477                         
 478 mike          1.111.2.1 StringRep* StringRep::create(const char* data, size_t size)
 479 kumpf         1.39      {
 480 mike          1.111.2.1     StringRep* rep = StringRep::alloc(size);
 481                             rep->size = _convert((Uint16*)rep->data, data, size);
 482                             rep->data[rep->size] = '\0';
 483 david.dillard 1.105     
 484 mike          1.111.2.1     return rep;
 485 kumpf         1.39      }
 486                         
 487 mike          1.111.2.4 StringRep* StringRep::createASCII7(const char* data, size_t size)
 488 mike          1.111.2.2 {
 489                             StringRep* rep = StringRep::alloc(size);
 490                             _copy((Uint16*)rep->data, data, size);
 491                             rep->data[rep->size = size] = '\0';
 492                             return rep;
 493                         }
 494                         
 495 mike          1.111.2.1 Uint32 StringRep::length(const Uint16* str)
 496 mike          1.27      {
 497 mike          1.111.2.5     // Note: We could unroll this but it is rarely called.
 498 david.dillard 1.105     
 499 mike          1.111.2.1     const Uint16* end = (Uint16*)str;
 500                         
 501                             while (*end++)
 502                         	;
 503                         
 504                             return end - str - 1;
 505 mike          1.27      }
 506                         
 507 mike          1.111.2.1 //==============================================================================
 508                         //
 509                         // class String
 510                         //
 511                         //==============================================================================
 512 david.dillard 1.105     
 513 mike          1.111.2.1 const String String::EMPTY;
 514                         
 515                         String::String(const String& str, Uint32 n)
 516                         {
 517                             _check_bounds(n, str._rep->size);
 518                             _rep = StringRep::create(str._rep->data, n);
 519 kumpf         1.39      }
 520 mike          1.27      
 521 mike          1.111.2.1 String::String(const Char16* str)
 522 kumpf         1.39      {
 523 mike          1.111.2.1     _check_null_pointer(str);
 524                             _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 525 mike          1.27      }
 526                         
 527 mike          1.111.2.1 String::String(const Char16* str, Uint32 n)
 528 mike          1.27      {
 529 mike          1.111.2.1     _check_null_pointer(str);
 530                             _rep = StringRep::create((Uint16*)str, n);
 531 mike          1.27      }
 532                         
 533 mike          1.111.2.1 String::String(const char* str)
 534 mike          1.27      {
 535 mike          1.111.2.1     _check_null_pointer(str);
 536                             _rep = StringRep::create(str, strlen(str));
 537 mike          1.27      }
 538                         
 539 mike          1.111.2.2 String::String(const char* str, String::ASCII7Tag tag)
 540                         {
 541                             _check_null_pointer(str);
 542 mike          1.111.2.4     _rep = StringRep::createASCII7(str, strlen(str));
 543 mike          1.111.2.2 }
 544                         
 545 mike          1.111.2.1 String::String(const char* str, Uint32 n)
 546 mike          1.27      {
 547 mike          1.111.2.1     _check_null_pointer(str);
 548                             _rep = StringRep::create(str, n);
 549                         }
 550 david.dillard 1.105     
 551 mike          1.111.2.2 String::String(const char* str, size_t n, String::ASCII7Tag tag)
 552                         {
 553                             _check_null_pointer(str);
 554 mike          1.111.2.4     _rep = StringRep::createASCII7(str, n);
 555 mike          1.111.2.2 }
 556                         
 557 mike          1.111.2.1 String::String(const String& s1, const String& s2)
 558                         {
 559                             size_t n1 = s1._rep->size;
 560                             size_t n2 = s2._rep->size;
 561                             size_t n = n1 + n2;
 562                             _rep = StringRep::alloc(n);
 563                             _copy(_rep->data, s1._rep->data, n1);
 564                             _copy(_rep->data + n1, s2._rep->data, n2);
 565                             _rep->size = n;
 566                             _rep->data[n] = '\0';
 567                         }
 568                         
 569                         String::String(const String& s1, const char* s2)
 570                         {
 571                             _check_null_pointer(s2);
 572                             size_t n1 = s1._rep->size;
 573                             size_t n2 = strlen(s2);
 574                             _rep = StringRep::alloc(n1 + n2);
 575                             _copy(_rep->data, s1._rep->data, n1);
 576                             _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
 577                             _rep->data[_rep->size] = '\0';
 578 mike          1.111.2.1 }
 579                         
 580                         String::String(const char* s1, const String& s2)
 581                         {
 582                             _check_null_pointer(s1);
 583                             size_t n1 = strlen(s1);
 584                             size_t n2 = s2._rep->size;
 585                             _rep = StringRep::alloc(n1 + n2);
 586                             _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
 587                             _copy(_rep->data + n1, s2._rep->data, n2);
 588                             _rep->data[_rep->size] = '\0';
 589 mike          1.27      }
 590                         
 591 mike          1.111.2.1 String& String::assign(const String& str)
 592 mike          1.27      {
 593 mike          1.111.2.1     if (_rep != str._rep)
 594 david.dillard 1.105         {
 595 mike          1.111.2.1 	StringRep::unref(_rep);
 596                         	StringRep::ref(_rep = str._rep);
 597 david.dillard 1.105         }
 598                         
 599 mike          1.27          return *this;
 600                         }
 601                         
 602 mike          1.111.2.1 String& String::assign(const Char16* str, Uint32 n)
 603 chuck         1.102     {
 604 mike          1.111.2.1     _check_null_pointer(str);
 605                         
 606                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 607 david.dillard 1.105         {
 608 mike          1.111.2.1 	StringRep::unref(_rep);
 609                         	_rep = StringRep::alloc(n);
 610 david.dillard 1.105         }
 611                         
 612 mike          1.111.2.1     _rep->size = n;
 613                             _copy(_rep->data, (Uint16*)str, n);
 614                             _rep->data[n] = '\0';
 615                         
 616 chuck         1.102         return *this;
 617                         }
 618                         
 619 kumpf         1.39      String& String::assign(const char* str, Uint32 n)
 620 mike          1.27      {
 621 mike          1.111.2.1     _check_null_pointer(str);
 622                         
 623                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 624 david.dillard 1.105         {
 625 mike          1.111.2.1 	StringRep::unref(_rep);
 626                         	_rep = StringRep::alloc(n);
 627 david.dillard 1.105         }
 628                         
 629 mike          1.111.2.1     _rep->size = _convert(_rep->data, str, n);
 630                             _rep->data[_rep->size] = 0;
 631                         
 632 mike          1.27          return *this;
 633                         }
 634                         
 635 mike          1.111.2.4 String& String::assignASCII7(const char* str, Uint32 n)
 636 mike          1.111.2.2 {
 637                             _check_null_pointer(str);
 638                         
 639                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 640                             {
 641                         	StringRep::unref(_rep);
 642                         	_rep = StringRep::alloc(n);
 643                             }
 644                         
 645                             _copy(_rep->data, str, n);
 646                             _rep->data[_rep->size = n] = 0;
 647                         
 648                             return *this;
 649                         }
 650                         
 651 kumpf         1.39      void String::clear()
 652                         {
 653 mike          1.111.2.1     if (_rep->size)
 654                             {
 655                         	if (Atomic_get(&_rep->refs) == 1)
 656                         	    _rep->size = 0;
 657                         	else
 658                         	{
 659                         	    StringRep::unref(_rep);
 660                         	    _rep = &StringRep::_empty_rep;
 661                         	}
 662                             }
 663 kumpf         1.39      }
 664                         
 665 mike          1.111.2.1 void String::reserveCapacity(Uint32 cap)
 666 kumpf         1.39      {
 667 mike          1.111.2.1     _reserve(_rep, cap);
 668 kumpf         1.39      }
 669                         
 670 mike          1.111.2.1 CString String::getCString() const
 671                         {
 672                         #ifdef PEGASUS_STRING_NO_UTF8
 673                             char* str = (char*)operator new(_rep->size + 1);
 674                             _copy(str, _rep->data, _rep->size);
 675                             str[_rep->size] = '\0';
 676                             return CString(str);
 677 gs.keenan     1.110     #else
 678 mike          1.111.2.1     Uint32 n = 3 * _rep->size;
 679                             char* str = (char*)operator new(n + 1);
 680 mike          1.111.2.2     size_t size = _copy_to_utf8(str, _rep->data, _rep->size);
 681 mike          1.111.2.1     str[size] = '\0';
 682                             return CString(str);
 683 gs.keenan     1.110     #endif
 684 kumpf         1.39      }
 685                         
 686 mike          1.111.2.1 String& String::append(const Char16* str, Uint32 n)
 687 kumpf         1.39      {
 688 mike          1.111.2.1     _check_null_pointer(str);
 689 kumpf         1.39      
 690 mike          1.111.2.1     size_t old_size = _rep->size;
 691                             size_t new_size = old_size + n;
 692                             _reserve(_rep, new_size);
 693                             _copy(_rep->data + old_size, (Uint16*)str, n);
 694                             _rep->size = new_size;
 695                             _rep->data[new_size] = '\0';
 696 mike          1.27      
 697 mike          1.111.2.1     return *this;
 698 mike          1.27      }
 699                         
 700 mike          1.111.2.1 String& String::append(const String& str)
 701 mike          1.27      {
 702 mike          1.111.2.1     return append((Char16*)str._rep->data, str._rep->size);
 703 mike          1.27      }
 704                         
 705 mike          1.111.2.1 String& String::append(const char* str, Uint32 size)
 706 kumpf         1.39      {
 707 mike          1.111.2.1     _check_null_pointer(str);
 708                         
 709                             size_t old_size = _rep->size;
 710                             size_t cap = old_size + size;
 711                         
 712                             _reserve(_rep, cap);
 713                             _rep->size += _convert((Uint16*)_rep->data + old_size, str, size);
 714                             _rep->data[_rep->size] = '\0';
 715 kumpf         1.39      
 716 mike          1.27          return *this;
 717                         }
 718                         
 719 mike          1.111.2.1 void String::remove(Uint32 index, Uint32 n)
 720 mike          1.27      {
 721 mike          1.111.2.1     if (n == PEG_NOT_FOUND)
 722                                 n = _rep->size - index;
 723 mike          1.27      
 724 mike          1.111.2.1     _check_bounds(index + n, _rep->size);
 725                         
 726                             if (Atomic_get(&_rep->refs) != 1)
 727                         	_rep = StringRep::copy_on_write(_rep);
 728 mike          1.27      
 729 mike          1.111.2.1     assert(index + n <= _rep->size);
 730 mike          1.27      
 731 mike          1.111.2.1     size_t rem = _rep->size - (index + n);
 732                             Uint16* data = _rep->data;
 733                         
 734                             if (rem)
 735                                 memmove(data + index, data + index + n, rem * sizeof(Uint16));
 736                         
 737                             _rep->size -= n;
 738                             data[_rep->size] = '\0';
 739 mike          1.27      }
 740                         
 741 mike          1.111.2.1 String String::subString(Uint32 index, Uint32 n) const
 742 mike          1.27      {
 743 mike          1.111.2.5     // Note: this implementation is very permissive but used for
 744 mike          1.111.2.1     // backwards compatibility.
 745                         
 746                             if (index < _rep->size)
 747 mike          1.27          {
 748 mike          1.111.2.1 	if (n == PEG_NOT_FOUND || n > _rep->size - index)
 749                         	    n = _rep->size - index;
 750 mike          1.27      
 751 mike          1.111.2.1 	return String((Char16*)_rep->data + index, n);
 752 mike          1.27          }
 753 david.dillard 1.105     
 754                             return String();
 755 mike          1.27      }
 756                         
 757                         Uint32 String::find(Char16 c) const
 758                         {
 759 mike          1.111.2.1     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 760 mike          1.27      
 761 mike          1.111.2.1     if (p)
 762                         	return p - _rep->data;
 763 mike          1.27      
 764                             return PEG_NOT_FOUND;
 765                         }
 766                         
 767 kumpf         1.53      Uint32 String::find(Uint32 index, Char16 c) const
 768 mike          1.30      {
 769 mike          1.111.2.1     _check_bounds(index, _rep->size);
 770 mike          1.30      
 771 mike          1.111.2.1     if (index >= _rep->size)
 772                         	return PEG_NOT_FOUND;
 773                         
 774                             Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 775                         
 776                             if (p)
 777                         	return p - _rep->data;
 778 mike          1.30      
 779                             return PEG_NOT_FOUND;
 780                         }
 781                         
 782 mike          1.111.2.1 Uint32 String::_find_aux(const Char16* s, Uint32 n) const
 783 mike          1.27      {
 784 mike          1.111.2.1     _check_null_pointer(s);
 785 mike          1.27      
 786 mike          1.111.2.1     const Uint16* data = _rep->data;
 787                             size_t rem = _rep->size;
 788 mike          1.30      
 789 mike          1.111.2.1     while (n <= rem)
 790 mike          1.27          {
 791 mike          1.111.2.1 	Uint16* p = (Uint16*)_find(data, rem, s[0]);
 792 david.dillard 1.105     
 793 mike          1.111.2.1 	if (!p)
 794                         	    break;
 795                         
 796                         	if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 797                         	    return p - _rep->data;
 798                         
 799                         	p++;
 800                         	rem -= p - data;
 801                         	data = p;
 802 mike          1.27          }
 803 mike          1.111.2.1 
 804 mike          1.27          return PEG_NOT_FOUND;
 805                         }
 806                         
 807 mike          1.111.2.1 Uint32 String::find(const char* s) const
 808                         {
 809                             _check_null_pointer(s);
 810                         
 811 mike          1.111.2.5     // Note: could optimize away creation of temporary, but this is rarely
 812                             // called.
 813 mike          1.111.2.1     return find(String(s));
 814                         }
 815                         
 816 mike          1.27      Uint32 String::reverseFind(Char16 c) const
 817                         {
 818 mike          1.111.2.1     Uint16 x = c;
 819                             Uint16* p = _rep->data;
 820                             Uint16* q = _rep->data + _rep->size;
 821 mike          1.27      
 822 mike          1.111.2.1     while (q != p)
 823 mike          1.27          {
 824 mike          1.111.2.1 	if (*--q == x)
 825                         	    return q - p;
 826 mike          1.27          }
 827                         
 828                             return PEG_NOT_FOUND;
 829                         }
 830                         
 831                         void String::toLower()
 832                         {
 833 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
 834                         
 835 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 836 david         1.90          {
 837 mike          1.111.2.1 	//// First calculate size of resulting string. u_strToLower() returns
 838                         	//// only the size when zero is passed as the destination size argument.
 839                         
 840 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
 841                         
 842 mike          1.111.2.1         int32_t new_size = u_strToLower(
 843                         	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 844 chuck         1.99      
 845 mike          1.111.2.1 	//// Reserve enough space for the result.
 846                         
 847                         	if ((Uint32)new_size > _rep->cap)
 848                         	    _reserve(_rep, new_size);
 849                         
 850                         	//// Perform the conversion (overlapping buffers are allowed).
 851                         
 852                                 u_strToLower((UChar*)_rep->data, new_size,
 853                         	    (UChar*)_rep->data, _rep->size, NULL, &err);
 854 yi.zhou       1.108     
 855 mike          1.111.2.1 	_rep->size = new_size;
 856 david         1.90          }
 857 mike          1.111.2.1 
 858                         #endif /* PEGASUS_STRING_ENABLE_ICU */
 859                         
 860                             if (Atomic_get(&_rep->refs) != 1)
 861                         	_rep = StringRep::copy_on_write(_rep);
 862                         
 863                             Uint16* p = _rep->data;
 864                             size_t n = _rep->size;
 865                         
 866                             for (; n--; p++)
 867 david         1.90          {
 868 mike          1.111.2.1 	if (!(*p & 0xFF00))
 869                         	    *p = _to_lower(*p);
 870 mike          1.27          }
 871 kumpf         1.39      }
 872                         
 873 chuck         1.99      void String::toUpper()
 874 david         1.90      {
 875 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
 876                         
 877 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 878 chuck         1.99          {
 879 mike          1.111.2.1 	//// First calculate size of resulting string. u_strToUpper() returns
 880                         	//// only the size when zero is passed as the destination size argument.
 881                         
 882 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
 883                         
 884 mike          1.111.2.1         int32_t new_size = u_strToUpper(
 885                         	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 886 chuck         1.99      
 887 mike          1.111.2.1 	//// Reserve enough space for the result.
 888 yi.zhou       1.108     
 889 mike          1.111.2.1 	if ((Uint32)new_size > _rep->cap)
 890                         	    _reserve(_rep, new_size);
 891 david         1.90      
 892 mike          1.111.2.1 	//// Perform the conversion (overlapping buffers are allowed).
 893 kumpf         1.39      
 894 mike          1.111.2.1         u_strToUpper((UChar*)_rep->data, new_size,
 895                         	    (UChar*)_rep->data, _rep->size, NULL, &err);
 896 mike          1.27      
 897 mike          1.111.2.1 	_rep->size = new_size;
 898 mike          1.27          }
 899                         
 900 mike          1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
 901                         
 902                             if (Atomic_get(&_rep->refs) != 1)
 903                         	_rep = StringRep::copy_on_write(_rep);
 904                         
 905                             Uint16* p = _rep->data;
 906                             size_t n = _rep->size;
 907                         
 908                             for (; n--; p++)
 909                         	*p = _to_upper(*p);
 910 mike          1.27      }
 911                         
 912 mike          1.111.2.1 int String::compare(const String& s1, const String& s2, Uint32 n)
 913 mike          1.30      {
 914 mike          1.111.2.1     assert(n <= s1._rep->size);
 915                             assert(n <= s2._rep->size);
 916 kumpf         1.43      
 917 mike          1.111.2.1     // Ignoring error in which n is greater than s1.size() or s2.size()
 918                             return _compare(s1._rep->data, s2._rep->data, n);
 919                         }
 920 mike          1.30      
 921 mike          1.111.2.1 int String::compare(const String& s1, const String& s2)
 922                         {
 923                             return _compare(s1._rep->data, s2._rep->data);
 924                         }
 925 mike          1.30      
 926 mike          1.111.2.1 int String::compare(const String& s1, const char* s2)
 927                         {
 928                             _check_null_pointer(s2);
 929 mike          1.30      
 930 mike          1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
 931                             return _compare_no_utf8(s1._rep->data, s2);
 932                         #else
 933                             // ATTN: optimize this!
 934                             return String::compare(s1, String(s2));
 935                         #endif
 936 mike          1.30      }
 937                         
 938 mike          1.111.2.1 int String::compareNoCase(const String& str1, const String& str2)
 939 kumpf         1.40      {
 940 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
 941                         
 942 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 943                             {
 944 mike          1.111.2.1         return  u_strcasecmp(
 945                         	    str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
 946 yi.zhou       1.108         }
 947 kumpf         1.40      
 948 mike          1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
 949 kumpf         1.40      
 950 mike          1.111.2.1     const Uint16* s1 = str1._rep->data;
 951                             const Uint16* s2 = str2._rep->data;
 952                         
 953                             while (*s1 && *s2)
 954                             {
 955                                 int r = _to_lower(*s1++) - _to_lower(*s2++);
 956 kumpf         1.40      
 957 david.dillard 1.105             if (r)
 958                                     return r;
 959 kumpf         1.40          }
 960                         
 961 mike          1.111.2.1     if (*s2)
 962 david.dillard 1.105             return -1;
 963 mike          1.111.2.1     else if (*s1)
 964 david.dillard 1.105             return 1;
 965 kumpf         1.40      
 966                             return 0;
 967                         }
 968                         
 969 mike          1.111.2.1 Boolean String::equalNoCase_aux(const String& s1, const String& s2)
 970 mike          1.27      {
 971 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
 972 mike          1.27      
 973 mike          1.111.2.1     return String::compareNoCase(s1, s2) == 0;
 974 kumpf         1.39      
 975 mike          1.111.2.1 #else /* PEGASUS_STRING_ENABLE_ICU */
 976 kumpf         1.39      
 977 mike          1.111.2.1     Uint16* p = (Uint16*)s1._rep->data;
 978                             Uint16* q = (Uint16*)s2._rep->data;
 979                             Uint32 n = s2._rep->size;
 980                         
 981                             while (n >= 8)
 982                             {
 983                         	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
 984                         	    ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
 985                         	    ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
 986                         	    ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) ||
 987                         	    ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) ||
 988                         	    ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) ||
 989                         	    ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) ||
 990                         	    ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7]))))
 991                         	{
 992                         	    return false;
 993                         	}
 994                         
 995                         	n -= 8;
 996                         	p += 8;
 997                         	q += 8;
 998 mike          1.111.2.1     }
 999                         
1000                             while (n >= 4)
1001                             {
1002                         	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
1003                         	    ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
1004                         	    ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
1005                         	    ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))))
1006                         	{
1007                         	    return false;
1008                         	}
1009                         
1010                         	n -= 4;
1011                         	p += 4;
1012                         	q += 4;
1013                             }
1014 mike          1.27      
1015 kumpf         1.39          while (n--)
1016                             {
1017 mike          1.111.2.1 	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))))
1018                         	    return false;
1019                         
1020                         	p++;
1021                         	q++;
1022 kumpf         1.39          }
1023 mike          1.28      
1024 kumpf         1.39          return true;
1025 david         1.69      
1026 mike          1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
1027                         }
1028 mike          1.27      
1029 mike          1.111.2.1 Boolean String::equalNoCase(const String& s1, const char* s2)
1030 david         1.69      {
1031 mike          1.111.2.1     _check_null_pointer(s2);
1032 david         1.69      
1033 mike          1.111.2.1 #if defined(PEGASUS_STRING_ENABLE_ICU)
1034 david.dillard 1.105     
1035 mike          1.111.2.1     return String::equalNoCase(s1, String(s2));
1036 david         1.71      
1037 mike          1.111.2.1 #elif defined(PEGASUS_STRING_NO_UTF8)
1038 kumpf         1.42      
1039 mike          1.111.2.1     const Uint16* p1 = (Uint16*)s1._rep->data;
1040                             const char* p2 = s2;
1041                             size_t n = s1._rep->size;
1042 karl          1.36      
1043 mike          1.111.2.1     while (n--)
1044                             {
1045                         	if (!*p2)
1046                         	    return false;
1047 david.dillard 1.105     
1048 mike          1.111.2.1 	if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++)))
1049                         	    return false;
1050                             }
1051                             
1052                             return true;
1053 david.dillard 1.105     
1054 mike          1.111.2.1 #else /* PEGASUS_STRING_ENABLE_ICU */
1055 chuck         1.78      
1056 mike          1.111.2.1     // ATTN: optimize this!
1057                             return String::equalNoCase(s1, String(s2));
1058 david.dillard 1.105     
1059 mike          1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
1060                         }
1061 karl          1.36      
1062 mike          1.111.2.1 Boolean String::equal(const String& s1, const String& s2)
1063 karl          1.36      {
1064 mike          1.111.2.1     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 
1065                         	s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1066 karl          1.36      }
1067                         
1068 mike          1.111.2.1 Boolean String::equal(const String& s1, const char* s2)
1069 karl          1.36      {
1070 mike          1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
1071 kumpf         1.39      
1072 mike          1.111.2.1     _check_null_pointer(s2);
1073 kumpf         1.39      
1074 mike          1.111.2.1     const Uint16* p = (Uint16*)s1._rep->data;
1075                             const char* q = s2;
1076 kumpf         1.39      
1077 mike          1.111.2.1     while (*p && *q)
1078                             {
1079                         	if (*p++ != Uint16(*q++))
1080                         	    return false;
1081                             }
1082 kumpf         1.39      
1083 mike          1.111.2.1     return !(*p || *q);
1084 kumpf         1.39      
1085 mike          1.111.2.1 #else /* PEGASUS_STRING_NO_UTF8 */
1086 kumpf         1.39      
1087 mike          1.111.2.1     return String::equal(s1, String(s2));
1088                         
1089                         #endif /* PEGASUS_STRING_NO_UTF8 */
1090 kumpf         1.39      }
1091                         
1092 kumpf         1.47      PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1093 kumpf         1.39      {
1094 david         1.69      #if defined(PEGASUS_OS_OS400)
1095 mike          1.111.2.1 
1096 david         1.93          CString cstr = str.getCString();
1097 david         1.69          const char* utf8str = cstr;
1098                             os << utf8str;
1099                         
1100 mike          1.111.2.1 #elif defined(PEGASUS_STRING_ENABLE_ICU)
1101                         
1102 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
1103                             {
1104 david.dillard 1.105             char *buf = NULL;
1105                                 const int size = str.size() * 6;
1106 mike          1.111.2.1         UnicodeString UniStr(
1107                         	    (const UChar *)str.getChar16Data(), (int32_t)str.size());
1108 david.dillard 1.105             Uint32 bufsize = UniStr.extract(0,size,buf);
1109                                 buf = new char[bufsize+1];
1110                                 UniStr.extract(0,bufsize,buf);
1111                                 os << buf;
1112                                 os.flush();
1113                                 delete [] buf;
1114 yi.zhou       1.108         }
1115 mike          1.111.2.1 
1116                         #endif /* PEGASUS_OS_OS400 */
1117                         
1118                             for (Uint32 i = 0, n = str.size(); i < n; i++)
1119 yi.zhou       1.108         {
1120 mike          1.111.2.1 	Uint16 code = str[i];
1121 david.dillard 1.105     
1122 mike          1.111.2.1 	if (code > 0 && !(code & 0xFF00))
1123                         	    os << char(code);
1124                         	else
1125                         	{
1126                         	    // Print in hex format:
1127                         	    char buffer[8];
1128                         	    sprintf(buffer, "\\x%04X", code);
1129                         	    os << buffer;
1130                         	}
1131 yi.zhou       1.108         }
1132 kumpf         1.39      
1133                             return os;
1134                         }
1135                         
1136 mike          1.111.2.1 void String::_append_char_aux()
1137 kumpf         1.39      {
1138 mike          1.111.2.1     StringRep* tmp;
1139 kumpf         1.39      
1140 mike          1.111.2.1     if (_rep->cap)
1141                             {
1142                         	tmp = StringRep::alloc(2 * _rep->cap);
1143                         	tmp->size = _rep->size;
1144                         	_copy(tmp->data, _rep->data, _rep->size);
1145                             }
1146                             else
1147                             {
1148                         	tmp = StringRep::alloc(8);
1149                         	tmp->size = 0;
1150                             }
1151 kumpf         1.39      
1152 mike          1.111.2.1     StringRep::unref(_rep);
1153                             _rep = tmp;
1154 kumpf         1.39      }
1155                         
1156 mike          1.111.2.1 PEGASUS_NAMESPACE_END
1157 kumpf         1.39      
1158 mike          1.111.2.1 /*
1159                         ================================================================================
1160 kumpf         1.39      
1161 mike          1.111.2.1 String optimizations:
1162                         
1163                             1.  Added mechanism allowing certain functions to be inlined only when
1164                         	used by internal Pegasus modules. External modules (i.e., providers)
1165                         	link to a non-inline version, which allows for binary compatibility.
1166                         
1167                             2.  Implemented copy-on-write with atomic increment/decrement. This
1168                         	yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1169                         	for the 'ni1000' benchmark.
1170                         
1171                             3.	Employed loop unrolling in several places. For example, see:
1172                         
1173                         	    static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1174                         
1175                             4.  Used the "empty-rep" optimization (described in whitepaper from the
1176                         	GCC Developers Summit). This reduced default construction to a simple
1177                         	pointer assignment.
1178                         
1179                         	    inline String::String() : _rep(&_empty_rep) { }
1180                         
1181                             5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1182 mike          1.111.2.1 	For example:
1183                         
1184                         	    static const char _upper[] =
1185                         	    {
1186                         		0,1,2,...255
1187                         	    };
1188                         
1189                         	    inline Uint16 _to_upper(Uint16 x)
1190                         	    {
1191                         		return (x & 0xFF00) ? x : _upper[x];
1192                         	    }
1193                         
1194                         	This outperforms the system implementation by avoiding an anding 
1195                         	operation.
1196                         
1197                             6.  Implemented char* version of the following member functions to 
1198                         	eliminate unecessary creation of anonymous string objects 
1199                         	(temporaries).
1200                         
1201                         	    String(const String& s1, const char* s2);
1202                         	    String(const char* s1, const String& s2);
1203 mike          1.111.2.1 	    String& String::operator=(const char* str);
1204                         	    Uint32 String::find(const char* s) const;
1205                         	    bool String::equal(const String& s1, const char* s2);
1206                         	    static int String::compare(const String& s1, const char* s2);
1207                         	    String& String::append(const char* str);
1208                         	    String& String::append(const char* str, Uint32 size);
1209                         	    static bool String::equalNoCase(const String& s1, const char* s2);
1210                         	    String& operator=(const char* str)
1211                         	    String& String::assign(const char* str)
1212                         	    String& String::append(const char* str)
1213                         	    Boolean operator==(const String& s1, const char* s2)
1214                         	    Boolean operator==(const char* s1, const String& s2)
1215                         	    Boolean operator!=(const String& s1, const char* s2)
1216                         	    Boolean operator!=(const char* s1, const String& s2)
1217                         	    Boolean operator<(const String& s1, const char* s2)
1218                         	    Boolean operator<(const char* s1, const String& s2)
1219                         	    Boolean operator>(const String& s1, const char* s2)
1220                         	    Boolean operator>(const char* s1, const String& s2)
1221                         	    Boolean operator<=(const String& s1, const char* s2)
1222                         	    Boolean operator<=(const char* s1, const String& s2)
1223                         	    Boolean operator>=(const String& s1, const char* s2)
1224 mike          1.111.2.1 	    Boolean operator>=(const char* s1, const String& s2)
1225                         	    String operator+(const String& s1, const char* s2)
1226                         	    String operator+(const char* s1, const String& s2)
1227                         
1228                             7.  Optimized _next_pow_2(), used in rounding the capacity to the next 
1229                                 power of two (algorithm from the book "Hacker's Delight").
1230                         
1231                         	    static Uint32 _next_pow_2(Uint32 x)
1232                         	    {
1233                         		if (x < 8)
1234                         		    return 8;
1235                         
1236                         		x--;
1237                         		x |= (x >> 1);
1238                         		x |= (x >> 2);
1239                         		x |= (x >> 4);
1240                         		x |= (x >> 8);
1241                         		x |= (x >> 16);
1242                         		x++;
1243                         
1244                         		return x;
1245 mike          1.111.2.1 	    }
1246                         
1247                             8.  Implemented "concatenating constructors" to eliminate temporaries
1248                         	created by operator+(). This scheme employs the "return-value 
1249                         	optimization" described by Stan Lippman.
1250                         
1251                         	    inline String operator+(const String& s1, const String& s2)
1252                         	    {
1253                         		return String(s1, s2, 0);
1254                         	    }
1255                         
1256                             9.  Experimented to find the optimial initial size for a short string.
1257                         	Eight seems to offer the best tradoff between space and time.
1258                         
1259                             10. Inlined all members of the Char16 class.
1260                         
1261                             11. Used Uint16 internally in the String class. This showed no improvememnt
1262                         	since Char16 was already fully inlined and was essentially reduced to
1263                         	Uint16 in any case.
1264                         
1265                             12. Implemented conditional logic (#if) allowing error checking logic to
1266 mike          1.111.2.1 	be excluded to better performance. Examples include bounds checking 
1267                         	and null-pointer checking.
1268                         
1269                             13. Used memcpy() and memcmp() where possible. These are implemented using
1270                         	the rep family of intructions under Intel and are much faster.
1271                         
1272 mike          1.111.2.2     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 
1273                         	copy routine overhead.
1274                         
1275                             15. Added ASCII7 form of the constructor and assign().
1276                         
1277                         	    String s("hello world", String::ASCII7);
1278                         
1279 mike          1.111.2.4 	    s.assignASCII7("hello world");
1280 mike          1.111.2.2 
1281 mike          1.111.2.6 	This avoids slower UTF8 processing when not needed.
1282 mike          1.111.2.2 
1283 mike          1.111.2.1 ================================================================================
1284                         */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2