(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30                     // Author: Mike Brasher (mbrasher@bmc.com)
  31                     //
  32                     //%/////////////////////////////////////////////////////////////////////////////
  33                     
  34 mike          1.111.2.6 #define PEGASUS_USE_INTERNAL_INLINES
  35 mike          1.27      #include "String.h"
  36 mike          1.111.2.6 #include <cassert>
  37 kumpf         1.48      #include "InternalException.h"
  38 david         1.69      #include "CommonUTF.h"
  39 mike          1.111.2.1 #include "CharSet.h"
  40 david         1.69      
  41 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
  42 chuck         1.99      #include <unicode/ustring.h>
  43                         #include <unicode/uchar.h>
  44 david         1.69      #endif
  45                         
  46 mike          1.27      PEGASUS_NAMESPACE_BEGIN
  47                         
  48 mike          1.111.2.1 //==============================================================================
  49 kumpf         1.39      //
  50 mike          1.111.2.6 // Compile-time switches (defined macros).
  51                         //
  52                         //     PEGASUS_STRING_ENABLE_ICU -- enables use of ICU package.
  53                         //
  54                         //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  55                         //      
  56                         //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  57                         //
  58                         //     PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature.
  59                         //
  60                         //==============================================================================
  61                         
  62                         //==============================================================================
  63                         //
  64 mike          1.111.2.1 // File-scope definitions:
  65 kumpf         1.54      //
  66 mike          1.111.2.1 //==============================================================================
  67 kumpf         1.54      
  68 mike          1.111.2.1 // Converts 16-bit characters to upper case.
  69                         inline Uint16 _to_upper(Uint16 x)
  70 kumpf         1.54      {
  71 mike          1.111.2.1     return (x & 0xFF00) ? x : CharSet::to_upper(x);
  72 kumpf         1.54      }
  73                         
  74 mike          1.111.2.1 // Converts 16-bit characters to lower case.
  75                         inline Uint16 _to_lower(Uint16 x)
  76 kumpf         1.54      {
  77 mike          1.111.2.1     return (x & 0xFF00) ? x : CharSet::to_lower(x);
  78                         }
  79 kumpf         1.82      
  80 mike          1.111.2.1 // Rounds x to the next power of two (or just returns 8 if x < 8).
  81                         static Uint32 _next_pow_2(Uint32 x)
  82                         {
  83                             if (x < 8)
  84                         	return 8;
  85                         
  86                             x--;
  87                             x |= (x >> 1);
  88                             x |= (x >> 2);
  89                             x |= (x >> 4);
  90                             x |= (x >> 8);
  91                             x |= (x >> 16);
  92                             x++;
  93                         
  94                             return x;
  95                         }
  96                         
  97                         template<class P, class Q>
  98                         static void _copy(P* p, const Q* q, size_t n)
  99                         {
 100                             // Use loop unrolling.
 101 mike          1.111.2.1 
 102                             while (n >= 8)
 103                             {
 104                         	p[0] = q[0];
 105                         	p[1] = q[1];
 106                         	p[2] = q[2];
 107                         	p[3] = q[3];
 108                         	p[4] = q[4];
 109                         	p[5] = q[5];
 110                         	p[6] = q[6];
 111                         	p[7] = q[7];
 112                         	p += 8;
 113                         	q += 8;
 114                         	n -= 8;
 115                             }
 116                         
 117                             while (n >= 4)
 118 kumpf         1.82          {
 119 mike          1.111.2.1 	p[0] = q[0];
 120                         	p[1] = q[1];
 121                         	p[2] = q[2];
 122                         	p[3] = q[3];
 123                         	p += 4;
 124                         	q += 4;
 125                         	n -= 4;
 126 kumpf         1.82          }
 127 mike          1.111.2.1 
 128                             while (n--)
 129                         	*p++ = *q++;
 130 kumpf         1.54      }
 131                         
 132 mike          1.111.2.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 133 kumpf         1.54      {
 134 mike          1.111.2.1     while (n >= 4)
 135                             {
 136                         	if (s[0] == c)
 137                         	    return (Uint16*)s;
 138                         	if (s[1] == c)
 139                         	    return (Uint16*)&s[1];
 140                         	if (s[2] == c)
 141                         	    return (Uint16*)&s[2];
 142                         	if (s[3] == c)
 143                         	    return (Uint16*)&s[3];
 144                         
 145                         	n -= 4;
 146                         	s += 4;
 147                             }
 148                         
 149                             if (n)
 150                             {
 151                         	if (*s == c)
 152                         	    return (Uint16*)s;
 153                         	s++;
 154                         	n--;
 155 mike          1.111.2.1     }
 156                         
 157                             if (n)
 158                             {
 159                         	if (*s == c)
 160                         	    return (Uint16*)s;
 161                         	s++;
 162                         	n--;
 163                             }
 164                         
 165                             if (n && *s == c)
 166                         	return (Uint16*)s;
 167                         
 168                             // Not found!
 169                             return 0;
 170 kumpf         1.54      }
 171                         
 172 mike          1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2)
 173 kumpf         1.54      {
 174 mike          1.111.2.1     while (*s1 && *s2)
 175 kumpf         1.82          {
 176 mike          1.111.2.1         int r = *s1++ - *s2++;
 177                         
 178                                 if (r)
 179                                     return r;
 180 kumpf         1.82          }
 181 mike          1.111.2.1 
 182                             if (*s2)
 183                                 return -1;
 184                             else if (*s1)
 185                                 return 1;
 186                         
 187                             return 0;
 188 kumpf         1.54      }
 189                         
 190 mike          1.111.2.1 static int _compare_no_utf8(const Uint16* s1, const char* s2)
 191 kumpf         1.56      {
 192 mike          1.111.2.1     Uint16 c1;
 193                             Uint16 c2;
 194                         
 195                             do
 196 kumpf         1.81          {
 197 mike          1.111.2.1 	c1 = *s1++;
 198                         	c2 = *s2++;
 199                         
 200                         	if (c1 == 0)
 201                         	    return c1 - c2;
 202 kumpf         1.81          }
 203 mike          1.111.2.1     while (c1 == c2);
 204                         
 205                             return c1 - c2;
 206 kumpf         1.56      }
 207                         
 208 mike          1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
 209 kumpf         1.54      {
 210 mike          1.111.2.1     // This should only be called when s1 and s2 have the same length.
 211                         
 212                             while (n-- && (*s1++ - *s2++) == 0)
 213                         	;
 214                         
 215                             return s1[-1] - s2[-1];
 216 kumpf         1.54      }
 217                         
 218 mike          1.111.2.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 219                         {
 220                             memcpy(s1, s2, n * sizeof(Uint16));
 221                         }
 222 kumpf         1.39      
 223 mike          1.111.2.1 void String_throw_out_of_bounds()
 224                         {
 225                             throw IndexOutOfBoundsException();
 226                         }
 227                         
 228                         #ifdef PEGASUS_STRING_NO_THROW
 229                         # define _check_null_pointer(ARG) /* empty */
 230                         #else
 231                         template<class T>
 232                         inline void _check_null_pointer(const T* ptr)
 233                         {
 234                             if (!ptr)
 235                         	throw NullPointer();
 236                         }
 237                         #endif
 238 mike          1.27      
 239 mike          1.111.2.1 static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)
 240 mike          1.27      {
 241 mike          1.111.2.1     Uint16* p = dest;
 242                             const Uint8* q = (const Uint8*)src;
 243 mike          1.111.2.2 
 244                             // Process leading 7-bit ASCII characters (to avoid UTF8 overhead below
 245                             // this loop). Use factor-four loop-unrolling.
 246                         
 247                             while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 248                             {
 249                         	p[0] = q[0];
 250                         	p[1] = q[1];
 251                         	p[2] = q[2];
 252                         	p[3] = q[3];
 253                         	p += 4;
 254                         	q += 4;
 255                         	n -= 4;
 256                             }
 257                         
 258                             switch (n)
 259                             {
 260                         	case 0:
 261                         	    return p - dest;
 262                         	case 1:
 263                         	    if (q[0] < 128)
 264 mike          1.111.2.2 	    {
 265                         		p[0] = q[0];
 266                         		return p + 1 - dest;
 267                         	    }
 268                         	    break;
 269                         	case 2:
 270                         	    if (q[0] < 128 && q[1] < 128)
 271                         	    {
 272                         		p[0] = q[0];
 273                         		p[1] = q[1];
 274                         		return p + 2 - dest;
 275                         	    }
 276                         	    break;
 277                         	case 3:
 278                         	    if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 279                         	    {
 280                         		p[0] = q[0];
 281                         		p[1] = q[1];
 282                         		p[2] = q[2];
 283                         		return p + 3 - dest;
 284                         	    }
 285 mike          1.111.2.2 	    break;
 286                             }
 287                         
 288                             // Process remaining characters.
 289 mike          1.111.2.1 
 290                             while (n)
 291                             {
 292 mike          1.111.2.2 	// Optimize for 7-bit ASCII case.
 293 mike          1.111.2.1 
 294 mike          1.111.2.2 	if (*q < 128)
 295 mike          1.111.2.1 	{
 296                         	    *p++ = *q++;
 297                         	    n--;
 298                         	}
 299 mike          1.111.2.2 	else
 300                         	{
 301                         	    Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 302 mike          1.111.2.1 
 303 mike          1.111.2.2 	    if (c > n || !isValid_U8(q, c) ||
 304                         		UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 305                         	    {
 306                         		throw Exception("Bad UTF8 encoding");
 307                         	    }
 308 mike          1.111.2.1 
 309 mike          1.111.2.2 	    n -= c;
 310 mike          1.111.2.1 	}
 311                             }
 312 mike          1.27      
 313 mike          1.111.2.1     return p - dest;
 314                         }
 315 mike          1.27      
 316 mike          1.111.2.1 // Note: dest must be at least three times src (plus an extra byte for 
 317                         // terminator).
 318 mike          1.111.2.2 static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)
 319 mike          1.111.2.1 {
 320 mike          1.111.2.2     const Uint16* q = src;
 321                             Uint8* p = (Uint8*)dest;
 322                         
 323                             while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 324                             {
 325                         	p[0] = q[0];
 326                         	p[1] = q[1];
 327                         	p[2] = q[2];
 328                         	p[3] = q[3];
 329                         	p += 4;
 330                         	q += 4;
 331                         	n -= 4;
 332                             }
 333                         
 334                             switch (n)
 335                             {
 336                         	case 0:
 337                         	    return p - (Uint8*)dest;
 338                         	case 1:
 339                         	    if (q[0] < 128)
 340                         	    {
 341 mike          1.111.2.2 		p[0] = q[0];
 342                         		return p + 1 - (Uint8*)dest;
 343                         	    }
 344                         	    break;
 345                         	case 2:
 346                         	    if (q[0] < 128 && q[1] < 128)
 347                         	    {
 348                         		p[0] = q[0];
 349                         		p[1] = q[1];
 350                         		return p + 2 - (Uint8*)dest;
 351                         	    }
 352                         	    break;
 353                         	case 3:
 354                         	    if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 355                         	    {
 356                         		p[0] = q[0];
 357                         		p[1] = q[1];
 358                         		p[2] = q[2];
 359                         		return p + 3 - (Uint8*)dest;
 360                         	    }
 361                         	    break;
 362 mike          1.111.2.2     }
 363                         
 364                             // If this line was reached, there must be characters greater than 128.
 365                         
 366                             UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 367                         
 368                             return p - (Uint8*)dest;
 369 mike          1.111.2.1 }
 370 mike          1.27      
 371 mike          1.111.2.1 static inline size_t _convert(Uint16* p, const char* q, size_t n)
 372                         {
 373                         #ifdef PEGASUS_STRING_NO_UTF8
 374                             _copy(p, q, n);
 375 mike          1.27          return n;
 376 mike          1.111.2.1 #else
 377                             return _copy_from_utf8(p, q, n);
 378                         #endif
 379 mike          1.27      }
 380                         
 381 mike          1.111.2.1 //==============================================================================
 382 chuck         1.102     //
 383 mike          1.111.2.1 // class CString
 384 chuck         1.102     //
 385 mike          1.111.2.1 //==============================================================================
 386                         
 387                         CString::CString(const CString& cstr) : _rep(0)
 388 chuck         1.102     {
 389 mike          1.111.2.1     if (cstr._rep)
 390 chuck         1.102         {
 391 mike          1.111.2.1 	size_t n = strlen(cstr._rep) + 1;
 392                                 _rep = (char*)operator new(n);
 393                         	memcpy(_rep, cstr._rep, n);
 394                             }
 395                         }
 396                         
 397                         CString& CString::operator=(const CString& cstr)
 398                         {
 399                             if (&cstr != this)
 400                             {
 401                                 if (_rep)
 402 david.dillard 1.105             {
 403 mike          1.111.2.1             operator delete(_rep);
 404                                     _rep = 0;
 405 chuck         1.102             }
 406 mike          1.111.2.1 
 407                                 if (cstr._rep)
 408 chuck         1.102             {
 409 mike          1.111.2.1 	    size_t n = strlen(cstr._rep) + 1;
 410                                     _rep = (char*)operator new(n);
 411                         	    memcpy(_rep, cstr._rep, n);
 412 chuck         1.102             }
 413 mike          1.111.2.1     }
 414 chuck         1.102     
 415 mike          1.111.2.1     return *this;
 416 chuck         1.102     }
 417                         
 418 mike          1.111.2.1 //==============================================================================
 419                         //
 420                         // class StringRep
 421                         //
 422                         //==============================================================================
 423 kumpf         1.43      
 424 mike          1.111.2.1 StringRep StringRep::_empty_rep;
 425 kumpf         1.43      
 426 mike          1.111.2.1 inline StringRep* StringRep::alloc(size_t cap)
 427 mike          1.27      {
 428 mike          1.111.2.1     StringRep* rep = (StringRep*)::operator new(
 429                         	sizeof(StringRep) + cap * sizeof(Uint16));
 430                             rep->cap = cap;
 431                             Atomic_create(&rep->refs, 1);
 432                         
 433                             return rep;
 434 mike          1.27      }
 435                         
 436 mike          1.111.2.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
 437 mike          1.27      {
 438 mike          1.111.2.1     if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
 439                             {
 440                         	size_t n = _next_pow_2(cap);
 441                         	StringRep* new_rep = StringRep::alloc(n);
 442                         	new_rep->size = rep->size;
 443                         	_copy(new_rep->data, rep->data, rep->size + 1);
 444                         	StringRep::unref(rep);
 445                         	rep = new_rep;
 446                             }
 447 kumpf         1.39      }
 448 tony          1.66      
 449 mike          1.111.2.1 StringRep* StringRep::create(const Uint16* data, size_t size)
 450 kumpf         1.39      {
 451 mike          1.111.2.1     StringRep* rep = StringRep::alloc(size);
 452                             rep->size = size;
 453                             _copy(rep->data, data, size);
 454                             rep->data[size] = '\0';
 455                             return rep;
 456 kumpf         1.39      }
 457                         
 458 mike          1.111.2.1 StringRep* StringRep::copy_on_write(StringRep* rep)
 459 kumpf         1.39      {
 460 mike          1.111.2.1     // Return a new copy of rep. Release rep.
 461 david.dillard 1.105     
 462 mike          1.111.2.1     StringRep* new_rep = StringRep::alloc(rep->size);
 463                             new_rep->size = rep->size;
 464                             _copy(new_rep->data, rep->data, rep->size);
 465                             new_rep->data[new_rep->size] = '\0';
 466                             StringRep::unref(rep);
 467                             return new_rep;
 468 mike          1.27      }
 469                         
 470 mike          1.111.2.1 StringRep* StringRep::create(const char* data, size_t size)
 471 kumpf         1.39      {
 472 mike          1.111.2.1     StringRep* rep = StringRep::alloc(size);
 473                             rep->size = _convert((Uint16*)rep->data, data, size);
 474                             rep->data[rep->size] = '\0';
 475 david.dillard 1.105     
 476 mike          1.111.2.1     return rep;
 477 kumpf         1.39      }
 478                         
 479 mike          1.111.2.4 StringRep* StringRep::createASCII7(const char* data, size_t size)
 480 mike          1.111.2.2 {
 481                             StringRep* rep = StringRep::alloc(size);
 482                             _copy((Uint16*)rep->data, data, size);
 483                             rep->data[rep->size = size] = '\0';
 484                             return rep;
 485                         }
 486                         
 487 mike          1.111.2.1 Uint32 StringRep::length(const Uint16* str)
 488 mike          1.27      {
 489 mike          1.111.2.5     // Note: We could unroll this but it is rarely called.
 490 david.dillard 1.105     
 491 mike          1.111.2.1     const Uint16* end = (Uint16*)str;
 492                         
 493                             while (*end++)
 494                         	;
 495                         
 496                             return end - str - 1;
 497 mike          1.27      }
 498                         
 499 mike          1.111.2.1 //==============================================================================
 500                         //
 501                         // class String
 502                         //
 503                         //==============================================================================
 504 david.dillard 1.105     
 505 mike          1.111.2.1 const String String::EMPTY;
 506                         
 507                         String::String(const String& str, Uint32 n)
 508                         {
 509                             _check_bounds(n, str._rep->size);
 510                             _rep = StringRep::create(str._rep->data, n);
 511 kumpf         1.39      }
 512 mike          1.27      
 513 mike          1.111.2.1 String::String(const Char16* str)
 514 kumpf         1.39      {
 515 mike          1.111.2.1     _check_null_pointer(str);
 516                             _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 517 mike          1.27      }
 518                         
 519 mike          1.111.2.1 String::String(const Char16* str, Uint32 n)
 520 mike          1.27      {
 521 mike          1.111.2.1     _check_null_pointer(str);
 522                             _rep = StringRep::create((Uint16*)str, n);
 523 mike          1.27      }
 524                         
 525 mike          1.111.2.1 String::String(const char* str)
 526 mike          1.27      {
 527 mike          1.111.2.1     _check_null_pointer(str);
 528                             _rep = StringRep::create(str, strlen(str));
 529 mike          1.27      }
 530                         
 531 mike          1.111.2.2 String::String(const char* str, String::ASCII7Tag tag)
 532                         {
 533                             _check_null_pointer(str);
 534 mike          1.111.2.4     _rep = StringRep::createASCII7(str, strlen(str));
 535 mike          1.111.2.2 }
 536                         
 537 mike          1.111.2.1 String::String(const char* str, Uint32 n)
 538 mike          1.27      {
 539 mike          1.111.2.1     _check_null_pointer(str);
 540                             _rep = StringRep::create(str, n);
 541                         }
 542 david.dillard 1.105     
 543 mike          1.111.2.2 String::String(const char* str, size_t n, String::ASCII7Tag tag)
 544                         {
 545                             _check_null_pointer(str);
 546 mike          1.111.2.4     _rep = StringRep::createASCII7(str, n);
 547 mike          1.111.2.2 }
 548                         
 549 mike          1.111.2.1 String::String(const String& s1, const String& s2)
 550                         {
 551                             size_t n1 = s1._rep->size;
 552                             size_t n2 = s2._rep->size;
 553                             size_t n = n1 + n2;
 554                             _rep = StringRep::alloc(n);
 555                             _copy(_rep->data, s1._rep->data, n1);
 556                             _copy(_rep->data + n1, s2._rep->data, n2);
 557                             _rep->size = n;
 558                             _rep->data[n] = '\0';
 559                         }
 560                         
 561                         String::String(const String& s1, const char* s2)
 562                         {
 563                             _check_null_pointer(s2);
 564                             size_t n1 = s1._rep->size;
 565                             size_t n2 = strlen(s2);
 566                             _rep = StringRep::alloc(n1 + n2);
 567                             _copy(_rep->data, s1._rep->data, n1);
 568                             _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
 569                             _rep->data[_rep->size] = '\0';
 570 mike          1.111.2.1 }
 571                         
 572                         String::String(const char* s1, const String& s2)
 573                         {
 574                             _check_null_pointer(s1);
 575                             size_t n1 = strlen(s1);
 576                             size_t n2 = s2._rep->size;
 577                             _rep = StringRep::alloc(n1 + n2);
 578                             _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
 579                             _copy(_rep->data + n1, s2._rep->data, n2);
 580                             _rep->data[_rep->size] = '\0';
 581 mike          1.27      }
 582                         
 583 mike          1.111.2.1 String& String::assign(const String& str)
 584 mike          1.27      {
 585 mike          1.111.2.1     if (_rep != str._rep)
 586 david.dillard 1.105         {
 587 mike          1.111.2.1 	StringRep::unref(_rep);
 588                         	StringRep::ref(_rep = str._rep);
 589 david.dillard 1.105         }
 590                         
 591 mike          1.27          return *this;
 592                         }
 593                         
 594 mike          1.111.2.1 String& String::assign(const Char16* str, Uint32 n)
 595 chuck         1.102     {
 596 mike          1.111.2.1     _check_null_pointer(str);
 597                         
 598                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 599 david.dillard 1.105         {
 600 mike          1.111.2.1 	StringRep::unref(_rep);
 601                         	_rep = StringRep::alloc(n);
 602 david.dillard 1.105         }
 603                         
 604 mike          1.111.2.1     _rep->size = n;
 605                             _copy(_rep->data, (Uint16*)str, n);
 606                             _rep->data[n] = '\0';
 607                         
 608 chuck         1.102         return *this;
 609                         }
 610                         
 611 kumpf         1.39      String& String::assign(const char* str, Uint32 n)
 612 mike          1.27      {
 613 mike          1.111.2.1     _check_null_pointer(str);
 614                         
 615                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 616 david.dillard 1.105         {
 617 mike          1.111.2.1 	StringRep::unref(_rep);
 618                         	_rep = StringRep::alloc(n);
 619 david.dillard 1.105         }
 620                         
 621 mike          1.111.2.1     _rep->size = _convert(_rep->data, str, n);
 622                             _rep->data[_rep->size] = 0;
 623                         
 624 mike          1.27          return *this;
 625                         }
 626                         
 627 mike          1.111.2.4 String& String::assignASCII7(const char* str, Uint32 n)
 628 mike          1.111.2.2 {
 629                             _check_null_pointer(str);
 630                         
 631                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 632                             {
 633                         	StringRep::unref(_rep);
 634                         	_rep = StringRep::alloc(n);
 635                             }
 636                         
 637                             _copy(_rep->data, str, n);
 638                             _rep->data[_rep->size = n] = 0;
 639                         
 640                             return *this;
 641                         }
 642                         
 643 kumpf         1.39      void String::clear()
 644                         {
 645 mike          1.111.2.1     if (_rep->size)
 646                             {
 647                         	if (Atomic_get(&_rep->refs) == 1)
 648                         	    _rep->size = 0;
 649                         	else
 650                         	{
 651                         	    StringRep::unref(_rep);
 652                         	    _rep = &StringRep::_empty_rep;
 653                         	}
 654                             }
 655 kumpf         1.39      }
 656                         
 657 mike          1.111.2.1 void String::reserveCapacity(Uint32 cap)
 658 kumpf         1.39      {
 659 mike          1.111.2.1     _reserve(_rep, cap);
 660 kumpf         1.39      }
 661                         
 662 mike          1.111.2.1 CString String::getCString() const
 663                         {
 664                         #ifdef PEGASUS_STRING_NO_UTF8
 665                             char* str = (char*)operator new(_rep->size + 1);
 666                             _copy(str, _rep->data, _rep->size);
 667                             str[_rep->size] = '\0';
 668                             return CString(str);
 669 gs.keenan     1.110     #else
 670 mike          1.111.2.1     Uint32 n = 3 * _rep->size;
 671                             char* str = (char*)operator new(n + 1);
 672 mike          1.111.2.2     size_t size = _copy_to_utf8(str, _rep->data, _rep->size);
 673 mike          1.111.2.1     str[size] = '\0';
 674                             return CString(str);
 675 gs.keenan     1.110     #endif
 676 kumpf         1.39      }
 677                         
 678 mike          1.111.2.1 String& String::append(const Char16* str, Uint32 n)
 679 kumpf         1.39      {
 680 mike          1.111.2.1     _check_null_pointer(str);
 681 kumpf         1.39      
 682 mike          1.111.2.1     size_t old_size = _rep->size;
 683                             size_t new_size = old_size + n;
 684                             _reserve(_rep, new_size);
 685                             _copy(_rep->data + old_size, (Uint16*)str, n);
 686                             _rep->size = new_size;
 687                             _rep->data[new_size] = '\0';
 688 mike          1.27      
 689 mike          1.111.2.1     return *this;
 690 mike          1.27      }
 691                         
 692 mike          1.111.2.1 String& String::append(const String& str)
 693 mike          1.27      {
 694 mike          1.111.2.1     return append((Char16*)str._rep->data, str._rep->size);
 695 mike          1.27      }
 696                         
 697 mike          1.111.2.1 String& String::append(const char* str, Uint32 size)
 698 kumpf         1.39      {
 699 mike          1.111.2.1     _check_null_pointer(str);
 700                         
 701                             size_t old_size = _rep->size;
 702                             size_t cap = old_size + size;
 703                         
 704                             _reserve(_rep, cap);
 705                             _rep->size += _convert((Uint16*)_rep->data + old_size, str, size);
 706                             _rep->data[_rep->size] = '\0';
 707 kumpf         1.39      
 708 mike          1.27          return *this;
 709                         }
 710                         
 711 mike          1.111.2.1 void String::remove(Uint32 index, Uint32 n)
 712 mike          1.27      {
 713 mike          1.111.2.1     if (n == PEG_NOT_FOUND)
 714                                 n = _rep->size - index;
 715 mike          1.27      
 716 mike          1.111.2.1     _check_bounds(index + n, _rep->size);
 717                         
 718                             if (Atomic_get(&_rep->refs) != 1)
 719                         	_rep = StringRep::copy_on_write(_rep);
 720 mike          1.27      
 721 mike          1.111.2.1     assert(index + n <= _rep->size);
 722 mike          1.27      
 723 mike          1.111.2.1     size_t rem = _rep->size - (index + n);
 724                             Uint16* data = _rep->data;
 725                         
 726                             if (rem)
 727                                 memmove(data + index, data + index + n, rem * sizeof(Uint16));
 728                         
 729                             _rep->size -= n;
 730                             data[_rep->size] = '\0';
 731 mike          1.27      }
 732                         
 733 mike          1.111.2.1 String String::subString(Uint32 index, Uint32 n) const
 734 mike          1.27      {
 735 mike          1.111.2.5     // Note: this implementation is very permissive but used for
 736 mike          1.111.2.1     // backwards compatibility.
 737                         
 738                             if (index < _rep->size)
 739 mike          1.27          {
 740 mike          1.111.2.1 	if (n == PEG_NOT_FOUND || n > _rep->size - index)
 741                         	    n = _rep->size - index;
 742 mike          1.27      
 743 mike          1.111.2.1 	return String((Char16*)_rep->data + index, n);
 744 mike          1.27          }
 745 david.dillard 1.105     
 746                             return String();
 747 mike          1.27      }
 748                         
 749                         Uint32 String::find(Char16 c) const
 750                         {
 751 mike          1.111.2.1     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 752 mike          1.27      
 753 mike          1.111.2.1     if (p)
 754                         	return p - _rep->data;
 755 mike          1.27      
 756                             return PEG_NOT_FOUND;
 757                         }
 758                         
 759 kumpf         1.53      Uint32 String::find(Uint32 index, Char16 c) const
 760 mike          1.30      {
 761 mike          1.111.2.1     _check_bounds(index, _rep->size);
 762 mike          1.30      
 763 mike          1.111.2.1     if (index >= _rep->size)
 764                         	return PEG_NOT_FOUND;
 765                         
 766                             Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 767                         
 768                             if (p)
 769                         	return p - _rep->data;
 770 mike          1.30      
 771                             return PEG_NOT_FOUND;
 772                         }
 773                         
 774 mike          1.111.2.1 Uint32 String::_find_aux(const Char16* s, Uint32 n) const
 775 mike          1.27      {
 776 mike          1.111.2.1     _check_null_pointer(s);
 777 mike          1.27      
 778 mike          1.111.2.1     const Uint16* data = _rep->data;
 779                             size_t rem = _rep->size;
 780 mike          1.30      
 781 mike          1.111.2.1     while (n <= rem)
 782 mike          1.27          {
 783 mike          1.111.2.1 	Uint16* p = (Uint16*)_find(data, rem, s[0]);
 784 david.dillard 1.105     
 785 mike          1.111.2.1 	if (!p)
 786                         	    break;
 787                         
 788                         	if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 789                         	    return p - _rep->data;
 790                         
 791                         	p++;
 792                         	rem -= p - data;
 793                         	data = p;
 794 mike          1.27          }
 795 mike          1.111.2.1 
 796 mike          1.27          return PEG_NOT_FOUND;
 797                         }
 798                         
 799 mike          1.111.2.1 Uint32 String::find(const char* s) const
 800                         {
 801                             _check_null_pointer(s);
 802                         
 803 mike          1.111.2.5     // Note: could optimize away creation of temporary, but this is rarely
 804                             // called.
 805 mike          1.111.2.1     return find(String(s));
 806                         }
 807                         
 808 mike          1.27      Uint32 String::reverseFind(Char16 c) const
 809                         {
 810 mike          1.111.2.1     Uint16 x = c;
 811                             Uint16* p = _rep->data;
 812                             Uint16* q = _rep->data + _rep->size;
 813 mike          1.27      
 814 mike          1.111.2.1     while (q != p)
 815 mike          1.27          {
 816 mike          1.111.2.1 	if (*--q == x)
 817                         	    return q - p;
 818 mike          1.27          }
 819                         
 820                             return PEG_NOT_FOUND;
 821                         }
 822                         
 823                         void String::toLower()
 824                         {
 825 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
 826                         
 827 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 828 david         1.90          {
 829 mike          1.111.2.1 	//// First calculate size of resulting string. u_strToLower() returns
 830                         	//// only the size when zero is passed as the destination size argument.
 831                         
 832 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
 833                         
 834 mike          1.111.2.1         int32_t new_size = u_strToLower(
 835                         	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 836 chuck         1.99      
 837 mike          1.111.2.1 	//// Reserve enough space for the result.
 838                         
 839                         	if ((Uint32)new_size > _rep->cap)
 840                         	    _reserve(_rep, new_size);
 841                         
 842                         	//// Perform the conversion (overlapping buffers are allowed).
 843                         
 844                                 u_strToLower((UChar*)_rep->data, new_size,
 845                         	    (UChar*)_rep->data, _rep->size, NULL, &err);
 846 yi.zhou       1.108     
 847 mike          1.111.2.1 	_rep->size = new_size;
 848 david         1.90          }
 849 mike          1.111.2.1 
 850                         #endif /* PEGASUS_STRING_ENABLE_ICU */
 851                         
 852                             if (Atomic_get(&_rep->refs) != 1)
 853                         	_rep = StringRep::copy_on_write(_rep);
 854                         
 855                             Uint16* p = _rep->data;
 856                             size_t n = _rep->size;
 857                         
 858                             for (; n--; p++)
 859 david         1.90          {
 860 mike          1.111.2.1 	if (!(*p & 0xFF00))
 861                         	    *p = _to_lower(*p);
 862 mike          1.27          }
 863 kumpf         1.39      }
 864                         
 865 chuck         1.99      void String::toUpper()
 866 david         1.90      {
 867 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
 868                         
 869 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 870 chuck         1.99          {
 871 mike          1.111.2.1 	//// First calculate size of resulting string. u_strToUpper() returns
 872                         	//// only the size when zero is passed as the destination size argument.
 873                         
 874 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
 875                         
 876 mike          1.111.2.1         int32_t new_size = u_strToUpper(
 877                         	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 878 chuck         1.99      
 879 mike          1.111.2.1 	//// Reserve enough space for the result.
 880 yi.zhou       1.108     
 881 mike          1.111.2.1 	if ((Uint32)new_size > _rep->cap)
 882                         	    _reserve(_rep, new_size);
 883 david         1.90      
 884 mike          1.111.2.1 	//// Perform the conversion (overlapping buffers are allowed).
 885 kumpf         1.39      
 886 mike          1.111.2.1         u_strToUpper((UChar*)_rep->data, new_size,
 887                         	    (UChar*)_rep->data, _rep->size, NULL, &err);
 888 mike          1.27      
 889 mike          1.111.2.1 	_rep->size = new_size;
 890 mike          1.27          }
 891                         
 892 mike          1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
 893                         
 894                             if (Atomic_get(&_rep->refs) != 1)
 895                         	_rep = StringRep::copy_on_write(_rep);
 896                         
 897                             Uint16* p = _rep->data;
 898                             size_t n = _rep->size;
 899                         
 900                             for (; n--; p++)
 901                         	*p = _to_upper(*p);
 902 mike          1.27      }
 903                         
 904 mike          1.111.2.1 int String::compare(const String& s1, const String& s2, Uint32 n)
 905 mike          1.30      {
 906 mike          1.111.2.1     assert(n <= s1._rep->size);
 907                             assert(n <= s2._rep->size);
 908 kumpf         1.43      
 909 mike          1.111.2.1     // Ignoring error in which n is greater than s1.size() or s2.size()
 910                             return _compare(s1._rep->data, s2._rep->data, n);
 911                         }
 912 mike          1.30      
 913 mike          1.111.2.1 int String::compare(const String& s1, const String& s2)
 914                         {
 915                             return _compare(s1._rep->data, s2._rep->data);
 916                         }
 917 mike          1.30      
 918 mike          1.111.2.1 int String::compare(const String& s1, const char* s2)
 919                         {
 920                             _check_null_pointer(s2);
 921 mike          1.30      
 922 mike          1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
 923                             return _compare_no_utf8(s1._rep->data, s2);
 924                         #else
 925                             // ATTN: optimize this!
 926                             return String::compare(s1, String(s2));
 927                         #endif
 928 mike          1.30      }
 929                         
 930 mike          1.111.2.1 int String::compareNoCase(const String& str1, const String& str2)
 931 kumpf         1.40      {
 932 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
 933                         
 934 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 935                             {
 936 mike          1.111.2.1         return  u_strcasecmp(
 937                         	    str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
 938 yi.zhou       1.108         }
 939 kumpf         1.40      
 940 mike          1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
 941 kumpf         1.40      
 942 mike          1.111.2.1     const Uint16* s1 = str1._rep->data;
 943                             const Uint16* s2 = str2._rep->data;
 944                         
 945                             while (*s1 && *s2)
 946                             {
 947                                 int r = _to_lower(*s1++) - _to_lower(*s2++);
 948 kumpf         1.40      
 949 david.dillard 1.105             if (r)
 950                                     return r;
 951 kumpf         1.40          }
 952                         
 953 mike          1.111.2.1     if (*s2)
 954 david.dillard 1.105             return -1;
 955 mike          1.111.2.1     else if (*s1)
 956 david.dillard 1.105             return 1;
 957 kumpf         1.40      
 958                             return 0;
 959                         }
 960                         
 961 mike          1.111.2.1 Boolean String::equalNoCase_aux(const String& s1, const String& s2)
 962 mike          1.27      {
 963 mike          1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
 964 mike          1.27      
 965 mike          1.111.2.1     return String::compareNoCase(s1, s2) == 0;
 966 kumpf         1.39      
 967 mike          1.111.2.1 #else /* PEGASUS_STRING_ENABLE_ICU */
 968 kumpf         1.39      
 969 mike          1.111.2.1     Uint16* p = (Uint16*)s1._rep->data;
 970                             Uint16* q = (Uint16*)s2._rep->data;
 971                             Uint32 n = s2._rep->size;
 972                         
 973                             while (n >= 8)
 974                             {
 975                         	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
 976                         	    ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
 977                         	    ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
 978                         	    ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) ||
 979                         	    ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) ||
 980                         	    ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) ||
 981                         	    ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) ||
 982                         	    ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7]))))
 983                         	{
 984                         	    return false;
 985                         	}
 986                         
 987                         	n -= 8;
 988                         	p += 8;
 989                         	q += 8;
 990 mike          1.111.2.1     }
 991                         
 992                             while (n >= 4)
 993                             {
 994                         	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
 995                         	    ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
 996                         	    ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
 997                         	    ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))))
 998                         	{
 999                         	    return false;
1000                         	}
1001                         
1002                         	n -= 4;
1003                         	p += 4;
1004                         	q += 4;
1005                             }
1006 mike          1.27      
1007 kumpf         1.39          while (n--)
1008                             {
1009 mike          1.111.2.1 	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))))
1010                         	    return false;
1011                         
1012                         	p++;
1013                         	q++;
1014 kumpf         1.39          }
1015 mike          1.28      
1016 kumpf         1.39          return true;
1017 david         1.69      
1018 mike          1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
1019                         }
1020 mike          1.27      
1021 mike          1.111.2.1 Boolean String::equalNoCase(const String& s1, const char* s2)
1022 david         1.69      {
1023 mike          1.111.2.1     _check_null_pointer(s2);
1024 david         1.69      
1025 mike          1.111.2.1 #if defined(PEGASUS_STRING_ENABLE_ICU)
1026 david.dillard 1.105     
1027 mike          1.111.2.1     return String::equalNoCase(s1, String(s2));
1028 david         1.71      
1029 mike          1.111.2.1 #elif defined(PEGASUS_STRING_NO_UTF8)
1030 kumpf         1.42      
1031 mike          1.111.2.1     const Uint16* p1 = (Uint16*)s1._rep->data;
1032                             const char* p2 = s2;
1033                             size_t n = s1._rep->size;
1034 karl          1.36      
1035 mike          1.111.2.1     while (n--)
1036                             {
1037                         	if (!*p2)
1038                         	    return false;
1039 david.dillard 1.105     
1040 mike          1.111.2.1 	if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++)))
1041                         	    return false;
1042                             }
1043                             
1044                             return true;
1045 david.dillard 1.105     
1046 mike          1.111.2.1 #else /* PEGASUS_STRING_ENABLE_ICU */
1047 chuck         1.78      
1048 mike          1.111.2.1     // ATTN: optimize this!
1049                             return String::equalNoCase(s1, String(s2));
1050 david.dillard 1.105     
1051 mike          1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
1052                         }
1053 karl          1.36      
1054 mike          1.111.2.1 Boolean String::equal(const String& s1, const String& s2)
1055 karl          1.36      {
1056 mike          1.111.2.1     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 
1057                         	s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1058 karl          1.36      }
1059                         
1060 mike          1.111.2.1 Boolean String::equal(const String& s1, const char* s2)
1061 karl          1.36      {
1062 mike          1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
1063 kumpf         1.39      
1064 mike          1.111.2.1     _check_null_pointer(s2);
1065 kumpf         1.39      
1066 mike          1.111.2.1     const Uint16* p = (Uint16*)s1._rep->data;
1067                             const char* q = s2;
1068 kumpf         1.39      
1069 mike          1.111.2.1     while (*p && *q)
1070                             {
1071                         	if (*p++ != Uint16(*q++))
1072                         	    return false;
1073                             }
1074 kumpf         1.39      
1075 mike          1.111.2.1     return !(*p || *q);
1076 kumpf         1.39      
1077 mike          1.111.2.1 #else /* PEGASUS_STRING_NO_UTF8 */
1078 kumpf         1.39      
1079 mike          1.111.2.1     return String::equal(s1, String(s2));
1080                         
1081                         #endif /* PEGASUS_STRING_NO_UTF8 */
1082 kumpf         1.39      }
1083                         
1084 kumpf         1.47      PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1085 kumpf         1.39      {
1086 david         1.69      #if defined(PEGASUS_OS_OS400)
1087 mike          1.111.2.1 
1088 david         1.93          CString cstr = str.getCString();
1089 david         1.69          const char* utf8str = cstr;
1090                             os << utf8str;
1091                         
1092 mike          1.111.2.1 #elif defined(PEGASUS_STRING_ENABLE_ICU)
1093                         
1094 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
1095                             {
1096 david.dillard 1.105             char *buf = NULL;
1097                                 const int size = str.size() * 6;
1098 mike          1.111.2.1         UnicodeString UniStr(
1099                         	    (const UChar *)str.getChar16Data(), (int32_t)str.size());
1100 david.dillard 1.105             Uint32 bufsize = UniStr.extract(0,size,buf);
1101                                 buf = new char[bufsize+1];
1102                                 UniStr.extract(0,bufsize,buf);
1103                                 os << buf;
1104                                 os.flush();
1105                                 delete [] buf;
1106 yi.zhou       1.108         }
1107 mike          1.111.2.1 
1108                         #endif /* PEGASUS_OS_OS400 */
1109                         
1110                             for (Uint32 i = 0, n = str.size(); i < n; i++)
1111 yi.zhou       1.108         {
1112 mike          1.111.2.1 	Uint16 code = str[i];
1113 david.dillard 1.105     
1114 mike          1.111.2.1 	if (code > 0 && !(code & 0xFF00))
1115                         	    os << char(code);
1116                         	else
1117                         	{
1118                         	    // Print in hex format:
1119                         	    char buffer[8];
1120                         	    sprintf(buffer, "\\x%04X", code);
1121                         	    os << buffer;
1122                         	}
1123 yi.zhou       1.108         }
1124 kumpf         1.39      
1125                             return os;
1126                         }
1127                         
1128 mike          1.111.2.1 void String::_append_char_aux()
1129 kumpf         1.39      {
1130 mike          1.111.2.1     StringRep* tmp;
1131 kumpf         1.39      
1132 mike          1.111.2.1     if (_rep->cap)
1133                             {
1134                         	tmp = StringRep::alloc(2 * _rep->cap);
1135                         	tmp->size = _rep->size;
1136                         	_copy(tmp->data, _rep->data, _rep->size);
1137                             }
1138                             else
1139                             {
1140                         	tmp = StringRep::alloc(8);
1141                         	tmp->size = 0;
1142                             }
1143 kumpf         1.39      
1144 mike          1.111.2.1     StringRep::unref(_rep);
1145                             _rep = tmp;
1146 kumpf         1.39      }
1147                         
1148 mike          1.111.2.1 PEGASUS_NAMESPACE_END
1149 kumpf         1.39      
1150 mike          1.111.2.1 /*
1151                         ================================================================================
1152 kumpf         1.39      
1153 mike          1.111.2.1 String optimizations:
1154                         
1155                             1.  Added mechanism allowing certain functions to be inlined only when
1156                         	used by internal Pegasus modules. External modules (i.e., providers)
1157                         	link to a non-inline version, which allows for binary compatibility.
1158                         
1159                             2.  Implemented copy-on-write with atomic increment/decrement. This
1160                         	yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1161                         	for the 'ni1000' benchmark.
1162                         
1163                             3.	Employed loop unrolling in several places. For example, see:
1164                         
1165                         	    static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1166                         
1167                             4.  Used the "empty-rep" optimization (described in whitepaper from the
1168                         	GCC Developers Summit). This reduced default construction to a simple
1169                         	pointer assignment.
1170                         
1171                         	    inline String::String() : _rep(&_empty_rep) { }
1172                         
1173                             5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1174 mike          1.111.2.1 	For example:
1175                         
1176                         	    static const char _upper[] =
1177                         	    {
1178                         		0,1,2,...255
1179                         	    };
1180                         
1181                         	    inline Uint16 _to_upper(Uint16 x)
1182                         	    {
1183                         		return (x & 0xFF00) ? x : _upper[x];
1184                         	    }
1185                         
1186                         	This outperforms the system implementation by avoiding an anding 
1187                         	operation.
1188                         
1189                             6.  Implemented char* version of the following member functions to 
1190                         	eliminate unecessary creation of anonymous string objects 
1191                         	(temporaries).
1192                         
1193                         	    String(const String& s1, const char* s2);
1194                         	    String(const char* s1, const String& s2);
1195 mike          1.111.2.1 	    String& String::operator=(const char* str);
1196                         	    Uint32 String::find(const char* s) const;
1197                         	    bool String::equal(const String& s1, const char* s2);
1198                         	    static int String::compare(const String& s1, const char* s2);
1199                         	    String& String::append(const char* str);
1200                         	    String& String::append(const char* str, Uint32 size);
1201                         	    static bool String::equalNoCase(const String& s1, const char* s2);
1202                         	    String& operator=(const char* str)
1203                         	    String& String::assign(const char* str)
1204                         	    String& String::append(const char* str)
1205                         	    Boolean operator==(const String& s1, const char* s2)
1206                         	    Boolean operator==(const char* s1, const String& s2)
1207                         	    Boolean operator!=(const String& s1, const char* s2)
1208                         	    Boolean operator!=(const char* s1, const String& s2)
1209                         	    Boolean operator<(const String& s1, const char* s2)
1210                         	    Boolean operator<(const char* s1, const String& s2)
1211                         	    Boolean operator>(const String& s1, const char* s2)
1212                         	    Boolean operator>(const char* s1, const String& s2)
1213                         	    Boolean operator<=(const String& s1, const char* s2)
1214                         	    Boolean operator<=(const char* s1, const String& s2)
1215                         	    Boolean operator>=(const String& s1, const char* s2)
1216 mike          1.111.2.1 	    Boolean operator>=(const char* s1, const String& s2)
1217                         	    String operator+(const String& s1, const char* s2)
1218                         	    String operator+(const char* s1, const String& s2)
1219                         
1220                             7.  Optimized _next_pow_2(), used in rounding the capacity to the next 
1221                                 power of two (algorithm from the book "Hacker's Delight").
1222                         
1223                         	    static Uint32 _next_pow_2(Uint32 x)
1224                         	    {
1225                         		if (x < 8)
1226                         		    return 8;
1227                         
1228                         		x--;
1229                         		x |= (x >> 1);
1230                         		x |= (x >> 2);
1231                         		x |= (x >> 4);
1232                         		x |= (x >> 8);
1233                         		x |= (x >> 16);
1234                         		x++;
1235                         
1236                         		return x;
1237 mike          1.111.2.1 	    }
1238                         
1239                             8.  Implemented "concatenating constructors" to eliminate temporaries
1240                         	created by operator+(). This scheme employs the "return-value 
1241                         	optimization" described by Stan Lippman.
1242                         
1243                         	    inline String operator+(const String& s1, const String& s2)
1244                         	    {
1245                         		return String(s1, s2, 0);
1246                         	    }
1247                         
1248                             9.  Experimented to find the optimial initial size for a short string.
1249                         	Eight seems to offer the best tradoff between space and time.
1250                         
1251                             10. Inlined all members of the Char16 class.
1252                         
1253                             11. Used Uint16 internally in the String class. This showed no improvememnt
1254                         	since Char16 was already fully inlined and was essentially reduced to
1255                         	Uint16 in any case.
1256                         
1257                             12. Implemented conditional logic (#if) allowing error checking logic to
1258 mike          1.111.2.1 	be excluded to better performance. Examples include bounds checking 
1259                         	and null-pointer checking.
1260                         
1261                             13. Used memcpy() and memcmp() where possible. These are implemented using
1262                         	the rep family of intructions under Intel and are much faster.
1263                         
1264 mike          1.111.2.2     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 
1265                         	copy routine overhead.
1266                         
1267                             15. Added ASCII7 form of the constructor and assign().
1268                         
1269                         	    String s("hello world", String::ASCII7);
1270                         
1271 mike          1.111.2.4 	    s.assignASCII7("hello world");
1272 mike          1.111.2.2 
1273 mike          1.111.2.6 	This avoids slower UTF8 processing when not needed.
1274 mike          1.111.2.2 
1275 mike          1.111.2.1 ================================================================================
1276                         */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2