(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
   2 martin 1.134 //
   3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
   4              // agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
   5              // this work for additional information regarding copyright ownership.
   6              // Each contributor licenses this file to you under the OpenPegasus Open
   7              // Source License; you may not use this file except in compliance with the
   8              // License.
   9 martin 1.134 //
  10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
  11              // copy of this software and associated documentation files (the "Software"),
  12              // to deal in the Software without restriction, including without limitation
  13              // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  14              // and/or sell copies of the Software, and to permit persons to whom the
  15              // Software is furnished to do so, subject to the following conditions:
  16 martin 1.134 //
  17 martin 1.133 // The above copyright notice and this permission notice shall be included
  18              // in all copies or substantial portions of the Software.
  19 martin 1.134 //
  20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23              // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  24              // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25              // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26              // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 martin 1.134 //
  28 martin 1.133 //////////////////////////////////////////////////////////////////////////
  29 mike   1.27  //
  30              //%/////////////////////////////////////////////////////////////////////////////
  31              
  32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  33 mike           1.113 #include <cstring>
  34 kumpf          1.48  #include "InternalException.h"
  35 mike           1.112 #include "MessageLoader.h"
  36                      #include "StringRep.h"
  37 david          1.69  
  38                      #ifdef PEGASUS_HAS_ICU
  39 kumpf          1.132 # include <unicode/ures.h>
  40                      # include <unicode/ustring.h>
  41                      # include <unicode/uchar.h>
  42 david          1.69  #endif
  43                      
  44 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  45 mike           1.28  
  46 mike           1.112 //==============================================================================
  47                      //
  48                      // Compile-time macros (undefined by default).
  49                      //
  50                      //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  51                      //
  52                      //==============================================================================
  53 mike           1.27  
  54 mike           1.112 //==============================================================================
  55 kumpf          1.39  //
  56 mike           1.112 // File-scope definitions:
  57 kumpf          1.54  //
  58 mike           1.112 //==============================================================================
  59                      
  60                      // Note: this table is much faster than the system toupper(). Please do not
  61                      // change.
  62 kumpf          1.54  
  63 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  64 kumpf          1.54  {
  65 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  66                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  67                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  68                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  69                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  70                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  71                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  72                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  73                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  74                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  75                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  76                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  77                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  78                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  79                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  80                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  81                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  82                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  83                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  84                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  85                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  86 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  87                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  88                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  89                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  90                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  91                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  92                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  93                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
  94                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
  95                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
  96                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
  97                      };
  98                      
  99                      // Note: this table is much faster than the system tulower(). Please do not
 100                      // change.
 101                      
 102 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 103 mike           1.112 {
 104                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 105                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 106                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 107                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 108                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 109                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 110                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 111                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 112                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 113                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 114                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 115                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 116                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 117                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 118                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 119                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 120                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 121                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 122                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 123                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 124 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 125                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 126                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 127                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 128                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 129                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 130                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 131                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 132                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 133                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 134                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 135                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 136                      };
 137                      
 138                      // Converts 16-bit characters to upper case. This routine is faster than the
 139                      // system toupper(). Please do not change.
 140                      inline Uint16 _toUpper(Uint16 x)
 141                      {
 142                          return (x & 0xFF00) ? x : _toUpperTable[x];
 143 kumpf          1.54  }
 144                      
 145 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 146                      // system toupper(). Please do not change.
 147                      inline Uint16 _toLower(Uint16 x)
 148 kumpf          1.54  {
 149 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 150                      }
 151                      
 152                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 153                      static Uint32 _roundUpToPow2(Uint32 x)
 154                      {
 155 dave.sudlik    1.120     // Check for potential overflow in x
 156                          PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 157 mike           1.112 
 158                          if (x < 8)
 159                              return 8;
 160                      
 161                          x--;
 162                          x |= (x >> 1);
 163                          x |= (x >> 2);
 164                          x |= (x >> 4);
 165                          x |= (x >> 8);
 166                          x |= (x >> 16);
 167                          x++;
 168                      
 169                          return x;
 170                      }
 171                      
 172                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 173                      {
 174                          // The following employs loop unrolling for efficiency. Please do not
 175                          // eliminate.
 176                      
 177                          while (n >= 4)
 178 mike           1.112     {
 179                              if (s[0] == c)
 180                                  return (Uint16*)s;
 181                              if (s[1] == c)
 182                                  return (Uint16*)&s[1];
 183                              if (s[2] == c)
 184                                  return (Uint16*)&s[2];
 185                              if (s[3] == c)
 186                                  return (Uint16*)&s[3];
 187 kumpf          1.82  
 188 mike           1.112         n -= 4;
 189                              s += 4;
 190                          }
 191                      
 192                          if (n)
 193                          {
 194                              if (*s == c)
 195                                  return (Uint16*)s;
 196                              s++;
 197                              n--;
 198                          }
 199                      
 200                          if (n)
 201                          {
 202                              if (*s == c)
 203                                  return (Uint16*)s;
 204                              s++;
 205                              n--;
 206                          }
 207                      
 208                          if (n && *s == c)
 209 mike           1.112         return (Uint16*)s;
 210                      
 211                          // Not found!
 212                          return 0;
 213                      }
 214                      
 215                      static int _compare(const Uint16* s1, const Uint16* s2)
 216                      {
 217                          while (*s1 && *s2)
 218                          {
 219                              int r = *s1++ - *s2++;
 220                      
 221                              if (r)
 222                                  return r;
 223                          }
 224                      
 225                          if (*s2)
 226                              return -1;
 227                          else if (*s1)
 228                              return 1;
 229                      
 230 mike           1.112     return 0;
 231                      }
 232                      
 233 kumpf          1.130 #ifdef PEGASUS_STRING_NO_UTF8
 234 mike           1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2)
 235                      {
 236                          Uint16 c1;
 237                          Uint16 c2;
 238                      
 239                          do
 240                          {
 241                              c1 = *s1++;
 242                              c2 = *s2++;
 243                      
 244                              if (c1 == 0)
 245                                  return c1 - c2;
 246                          }
 247                          while (c1 == c2);
 248                      
 249                          return c1 - c2;
 250                      }
 251 kumpf          1.130 #endif
 252 mike           1.112 
 253                      static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 254                      {
 255                          memcpy(s1, s2, n * sizeof(Uint16));
 256                      }
 257                      
 258                      void StringThrowOutOfBounds()
 259                      {
 260                          throw IndexOutOfBoundsException();
 261                      }
 262                      
 263                      inline void _checkNullPointer(const void* ptr)
 264                      {
 265                          if (!ptr)
 266                              throw NullPointer();
 267                      }
 268                      
 269                      static void _StringThrowBadUTF8(Uint32 index)
 270                      {
 271                          MessageLoaderParms parms(
 272                              "Common.String.BAD_UTF8",
 273 mike           1.112         "The byte sequence starting at index $0 "
 274                              "is not valid UTF-8 encoding.",
 275                              index);
 276                          throw Exception(parms);
 277                      }
 278                      
 279 david.dillard  1.116 // Note: dest must be at least three times src (plus an extra byte for
 280 mike           1.112 // terminator).
 281                      static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 282                      {
 283                          // The following employs loop unrolling for efficiency. Please do not
 284                          // eliminate.
 285                      
 286                          const Uint16* q = src;
 287                          Uint8* p = (Uint8*)dest;
 288                      
 289                          while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 290 kumpf          1.82      {
 291 mike           1.112         p[0] = q[0];
 292                              p[1] = q[1];
 293                              p[2] = q[2];
 294                              p[3] = q[3];
 295                              p += 4;
 296                              q += 4;
 297                              n -= 4;
 298 kumpf          1.82      }
 299 mike           1.112 
 300                          switch (n)
 301                          {
 302                              case 0:
 303                                  return p - (Uint8*)dest;
 304                              case 1:
 305                                  if (q[0] < 128)
 306                                  {
 307                                      p[0] = q[0];
 308                                      return p + 1 - (Uint8*)dest;
 309                                  }
 310                                  break;
 311                              case 2:
 312                                  if (q[0] < 128 && q[1] < 128)
 313                                  {
 314                                      p[0] = q[0];
 315                                      p[1] = q[1];
 316                                      return p + 2 - (Uint8*)dest;
 317                                  }
 318                                  break;
 319                              case 3:
 320 mike           1.112             if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 321                                  {
 322                                      p[0] = q[0];
 323                                      p[1] = q[1];
 324                                      p[2] = q[2];
 325                                      return p + 3 - (Uint8*)dest;
 326                                  }
 327                                  break;
 328                          }
 329                      
 330                          // If this line was reached, there must be characters greater than 128.
 331                      
 332                          UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 333                      
 334                          return p - (Uint8*)dest;
 335 kumpf          1.54  }
 336                      
 337 mike           1.112 //==============================================================================
 338                      //
 339                      // class CString
 340                      //
 341                      //==============================================================================
 342                      
 343                      CString::CString(const CString& cstr) : _rep(0)
 344 kumpf          1.54  {
 345 mike           1.112     if (cstr._rep)
 346 kumpf          1.82      {
 347 mike           1.112         size_t n = strlen(cstr._rep) + 1;
 348                              _rep = (char*)operator new(n);
 349                              memcpy(_rep, cstr._rep, n);
 350 kumpf          1.82      }
 351 kumpf          1.54  }
 352                      
 353 kumpf          1.56  CString& CString::operator=(const CString& cstr)
 354                      {
 355 kumpf          1.82      if (&cstr != this)
 356 kumpf          1.81      {
 357 kumpf          1.82          if (_rep)
 358                              {
 359 mike           1.112             operator delete(_rep);
 360 kumpf          1.82              _rep = 0;
 361                              }
 362 mike           1.112 
 363 kumpf          1.82          if (cstr._rep)
 364                              {
 365 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 366                                  _rep = (char*)operator new(n);
 367                                  memcpy(_rep, cstr._rep, n);
 368 kumpf          1.82          }
 369 kumpf          1.81      }
 370 mike           1.112 
 371 kumpf          1.56      return *this;
 372                      }
 373                      
 374 mike           1.112 //==============================================================================
 375 kumpf          1.54  //
 376 mike           1.112 // class StringRep
 377 kumpf          1.39  //
 378 mike           1.112 //==============================================================================
 379 kumpf          1.39  
 380 mike           1.112 StringRep StringRep::_emptyRep;
 381 mike           1.27  
 382 mike           1.112 inline StringRep* StringRep::alloc(size_t cap)
 383 mike           1.27  {
 384 dave.sudlik    1.120     // Check for potential overflow in cap
 385                          PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 386 mike           1.27  
 387 mike           1.112     StringRep* rep = (StringRep*)::operator new(
 388                              sizeof(StringRep) + cap * sizeof(Uint16));
 389                          rep->cap = cap;
 390                          new(&rep->refs) AtomicInt(1);
 391                      
 392                          return rep;
 393 mike           1.27  }
 394                      
 395 mike           1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
 396 chuck          1.102 {
 397 mike           1.114     if (cap > rep->cap || rep->refs.get() != 1)
 398 chuck          1.102     {
 399 mike           1.112         size_t n = _roundUpToPow2(cap);
 400                              StringRep* newRep = StringRep::alloc(n);
 401                              newRep->size = rep->size;
 402                              _copy(newRep->data, rep->data, rep->size + 1);
 403                              StringRep::unref(rep);
 404                              rep = newRep;
 405                          }
 406                      }
 407 david.dillard  1.105 
 408 mike           1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
 409                      {
 410                          StringRep* rep = StringRep::alloc(size);
 411                          rep->size = size;
 412                          _copy(rep->data, data, size);
 413                          rep->data[size] = '\0';
 414                          return rep;
 415                      }
 416 chuck          1.102 
 417 mike           1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
 418                      {
 419                          // Return a new copy of rep. Release rep.
 420 chuck          1.102 
 421 mike           1.112     StringRep* newRep = StringRep::alloc(rep->size);
 422                          newRep->size = rep->size;
 423                          _copy(newRep->data, rep->data, rep->size);
 424                          newRep->data[newRep->size] = '\0';
 425                          StringRep::unref(rep);
 426                          return newRep;
 427 chuck          1.102 }
 428                      
 429 mike           1.112 StringRep* StringRep::create(const char* data, size_t size)
 430 kumpf          1.43  {
 431 mike           1.112     StringRep* rep = StringRep::alloc(size);
 432                          size_t utf8_error_index;
 433                          rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 434                      
 435                          if (rep->size == size_t(-1))
 436                          {
 437                              StringRep::free(rep);
 438 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 439 mike           1.112     }
 440 kumpf          1.43  
 441 mike           1.112     rep->data[rep->size] = '\0';
 442 kumpf          1.43  
 443 mike           1.112     return rep;
 444 mike           1.27  }
 445                      
 446 mike           1.112 Uint32 StringRep::length(const Uint16* str)
 447 mike           1.27  {
 448 mike           1.112     // Note: We could unroll this but it is rarely called.
 449                      
 450                          const Uint16* end = (Uint16*)str;
 451                      
 452                          while (*end++)
 453                              ;
 454                      
 455 a.dunfey       1.125     return (Uint32)(end - str - 1);
 456 kumpf          1.39  }
 457 tony           1.66  
 458 mike           1.112 //==============================================================================
 459                      //
 460                      // class String
 461                      //
 462                      //==============================================================================
 463                      
 464                      const String String::EMPTY;
 465 mike           1.27  
 466 kumpf          1.39  String::String(const String& str, Uint32 n)
 467                      {
 468 mike           1.112     _checkBounds(n, str._rep->size);
 469                          _rep = StringRep::create(str._rep->data, n);
 470 kumpf          1.39  }
 471                      
 472                      String::String(const Char16* str)
 473                      {
 474 mike           1.112     _checkNullPointer(str);
 475                          _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 476 mike           1.27  }
 477                      
 478 kumpf          1.39  String::String(const Char16* str, Uint32 n)
 479                      {
 480 mike           1.112     _checkNullPointer(str);
 481                          _rep = StringRep::create((Uint16*)str, n);
 482 kumpf          1.39  }
 483                      
 484                      String::String(const char* str)
 485 mike           1.27  {
 486 mike           1.112     _checkNullPointer(str);
 487 david.dillard  1.105 
 488 mike           1.112     // Set this just in case create() throws an exception.
 489                          _rep = &StringRep::_emptyRep;
 490                          _rep = StringRep::create(str, strlen(str));
 491 mike           1.27  }
 492                      
 493 kumpf          1.39  String::String(const char* str, Uint32 n)
 494 mike           1.27  {
 495 mike           1.112     _checkNullPointer(str);
 496 david.dillard  1.105 
 497 mike           1.112     // Set this just in case create() throws an exception.
 498                          _rep = &StringRep::_emptyRep;
 499                          _rep = StringRep::create(str, n);
 500 kumpf          1.39  }
 501 mike           1.27  
 502 mike           1.112 String::String(const String& s1, const String& s2)
 503 kumpf          1.39  {
 504 mike           1.112     size_t n1 = s1._rep->size;
 505                          size_t n2 = s2._rep->size;
 506                          size_t n = n1 + n2;
 507                          _rep = StringRep::alloc(n);
 508                          _copy(_rep->data, s1._rep->data, n1);
 509                          _copy(_rep->data + n1, s2._rep->data, n2);
 510                          _rep->size = n;
 511                          _rep->data[n] = '\0';
 512 mike           1.27  }
 513                      
 514 mike           1.112 String::String(const String& s1, const char* s2)
 515 mike           1.27  {
 516 mike           1.112     _checkNullPointer(s2);
 517                          size_t n1 = s1._rep->size;
 518                          size_t n2 = strlen(s2);
 519                          _rep = StringRep::alloc(n1 + n2);
 520                          _copy(_rep->data, s1._rep->data, n1);
 521                          size_t utf8_error_index;
 522                          size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 523                      
 524                          if (tmp == size_t(-1))
 525 kumpf          1.82      {
 526 mike           1.112         StringRep::free(_rep);
 527                              _rep = &StringRep::_emptyRep;
 528 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 529 kumpf          1.82      }
 530 mike           1.112 
 531                          _rep->size = n1 + tmp;
 532                          _rep->data[_rep->size] = '\0';
 533 mike           1.27  }
 534                      
 535 mike           1.112 String::String(const char* s1, const String& s2)
 536 mike           1.27  {
 537 mike           1.112     _checkNullPointer(s1);
 538                          size_t n1 = strlen(s1);
 539                          size_t n2 = s2._rep->size;
 540                          _rep = StringRep::alloc(n1 + n2);
 541                          size_t utf8_error_index;
 542                          size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 543                      
 544                          if (tmp ==  size_t(-1))
 545                          {
 546                              StringRep::free(_rep);
 547                              _rep = &StringRep::_emptyRep;
 548 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 549 mike           1.112     }
 550                      
 551                          _rep->size = n2 + tmp;
 552                          _copy(_rep->data + n1, s2._rep->data, n2);
 553                          _rep->data[_rep->size] = '\0';
 554 mike           1.27  }
 555                      
 556 mike           1.112 String& String::assign(const String& str)
 557 mike           1.27  {
 558 mike           1.112     if (_rep != str._rep)
 559 david.dillard  1.105     {
 560 mike           1.112         StringRep::unref(_rep);
 561                              StringRep::ref(_rep = str._rep);
 562 david.dillard  1.105     }
 563                      
 564 mike           1.27      return *this;
 565                      }
 566                      
 567                      String& String::assign(const Char16* str, Uint32 n)
 568                      {
 569 mike           1.112     _checkNullPointer(str);
 570                      
 571 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 572 david.dillard  1.105     {
 573 mike           1.112         StringRep::unref(_rep);
 574                              _rep = StringRep::alloc(n);
 575 david.dillard  1.105     }
 576                      
 577 mike           1.112     _rep->size = n;
 578                          _copy(_rep->data, (Uint16*)str, n);
 579                          _rep->data[n] = '\0';
 580                      
 581 mike           1.27      return *this;
 582                      }
 583                      
 584 mike           1.112 String& String::assign(const char* str, Uint32 n)
 585 chuck          1.102 {
 586 mike           1.112     _checkNullPointer(str);
 587                      
 588 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 589 david.dillard  1.105     {
 590 mike           1.112         StringRep::unref(_rep);
 591                              _rep = StringRep::alloc(n);
 592 david.dillard  1.105     }
 593                      
 594 mike           1.112     size_t utf8_error_index;
 595                          _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 596 chuck          1.102 
 597 mike           1.112     if (_rep->size ==  size_t(-1))
 598 david.dillard  1.105     {
 599 mike           1.112         StringRep::free(_rep);
 600                              _rep = &StringRep::_emptyRep;
 601 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 602 david.dillard  1.105     }
 603 mike           1.112 
 604                          _rep->data[_rep->size] = 0;
 605 david.dillard  1.105 
 606 mike           1.27      return *this;
 607                      }
 608                      
 609 kumpf          1.39  void String::clear()
 610                      {
 611 mike           1.112     if (_rep->size)
 612                          {
 613 mike           1.114         if (_rep->refs.get() == 1)
 614 mike           1.112         {
 615                                  _rep->size = 0;
 616                                  _rep->data[0] = '\0';
 617                              }
 618                              else
 619                              {
 620                                  StringRep::unref(_rep);
 621                                  _rep = &StringRep::_emptyRep;
 622                              }
 623                          }
 624 kumpf          1.39  }
 625                      
 626 mike           1.112 void String::reserveCapacity(Uint32 cap)
 627 kumpf          1.39  {
 628 mike           1.112     _reserve(_rep, cap);
 629 kumpf          1.39  }
 630                      
 631 mike           1.112 CString String::getCString() const
 632                      {
 633 david.dillard  1.116     // A UTF8 string can have three times as many characters as its UTF16
 634                          // counterpart, so we allocate extra memory for the worst case. In the
 635 mike           1.112     // best case, we may need only one third of the memory allocated. But
 636 david.dillard  1.116     // downsizing the string afterwards is expensive and unecessary since
 637                          // CString objects are usually short-lived (disappearing after only a few
 638 mike           1.112     // instructions). CString objects are typically created on the stack as
 639                          // means to obtain a char* pointer.
 640                      
 641                      #ifdef PEGASUS_STRING_NO_UTF8
 642                          char* str = (char*)operator new(_rep->size + 1);
 643                          _copy(str, _rep->data, _rep->size);
 644                          str[_rep->size] = '\0';
 645                          return CString(str);
 646 gs.keenan      1.110 #else
 647 a.dunfey       1.125     Uint32 n = (Uint32)(3 * _rep->size);
 648 mike           1.112     char* str = (char*)operator new(n + 1);
 649                          size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 650                          str[size] = '\0';
 651                          return CString(str);
 652 gs.keenan      1.110 #endif
 653 kumpf          1.39  }
 654                      
 655 mike           1.112 String& String::append(const Char16* str, Uint32 n)
 656 kumpf          1.39  {
 657 mike           1.112     _checkNullPointer(str);
 658                      
 659                          size_t oldSize = _rep->size;
 660                          size_t newSize = oldSize + n;
 661 a.dunfey       1.125     _reserve(_rep, (Uint32)newSize);
 662 mike           1.112     _copy(_rep->data + oldSize, (Uint16*)str, n);
 663                          _rep->size = newSize;
 664                          _rep->data[newSize] = '\0';
 665                      
 666                          return *this;
 667 kumpf          1.39  }
 668                      
 669 mike           1.112 String& String::append(const String& str)
 670 mike           1.27  {
 671 a.dunfey       1.125     return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
 672 mike           1.27  }
 673                      
 674 mike           1.112 String& String::append(const char* str, Uint32 size)
 675 mike           1.27  {
 676 mike           1.112     _checkNullPointer(str);
 677                      
 678                          size_t oldSize = _rep->size;
 679                          size_t cap = oldSize + size;
 680                      
 681 a.dunfey       1.125     _reserve(_rep, (Uint32)cap);
 682 mike           1.112     size_t utf8_error_index;
 683                          size_t tmp = _convert(
 684                              (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 685                      
 686                          if (tmp ==  size_t(-1))
 687                          {
 688                              StringRep::free(_rep);
 689                              _rep = &StringRep::_emptyRep;
 690 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 691 mike           1.112     }
 692 mike           1.27  
 693 mike           1.112     _rep->size += tmp;
 694                          _rep->data[_rep->size] = '\0';
 695 mike           1.27  
 696 kumpf          1.39      return *this;
 697                      }
 698                      
 699 mike           1.112 void String::remove(Uint32 index, Uint32 n)
 700 mike           1.27  {
 701 mike           1.112     if (n == PEG_NOT_FOUND)
 702 a.dunfey       1.125         n = (Uint32)(_rep->size - index);
 703 mike           1.112 
 704                          _checkBounds(index + n, _rep->size);
 705                      
 706 mike           1.114     if (_rep->refs.get() != 1)
 707 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 708 mike           1.27  
 709 jim.wunderlich 1.115     PEGASUS_ASSERT(index + n <= _rep->size);
 710 mike           1.27  
 711 mike           1.112     size_t rem = _rep->size - (index + n);
 712                          Uint16* data = _rep->data;
 713 mike           1.27  
 714 mike           1.112     if (rem)
 715                              memmove(data + index, data + index + n, rem * sizeof(Uint16));
 716 mike           1.27  
 717 mike           1.112     _rep->size -= n;
 718                          data[_rep->size] = '\0';
 719 mike           1.27  }
 720                      
 721 mike           1.112 String String::subString(Uint32 index, Uint32 n) const
 722 mike           1.27  {
 723 mike           1.112     // Note: this implementation is very permissive but used for
 724                          // backwards compatibility.
 725                      
 726                          if (index < _rep->size)
 727 mike           1.27      {
 728 mike           1.112         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 729 a.dunfey       1.125             n = (Uint32)(_rep->size - index);
 730 mike           1.27  
 731 w.otsuka       1.121         return String((Char16*)(_rep->data + index), n);
 732 mike           1.27      }
 733 david.dillard  1.105 
 734                          return String();
 735 mike           1.27  }
 736                      
 737                      Uint32 String::find(Char16 c) const
 738                      {
 739 mike           1.112     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 740 mike           1.27  
 741 mike           1.112     if (p)
 742 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 743 mike           1.27  
 744                          return PEG_NOT_FOUND;
 745                      }
 746                      
 747 kumpf          1.53  Uint32 String::find(Uint32 index, Char16 c) const
 748 mike           1.30  {
 749 mike           1.112     _checkBounds(index, _rep->size);
 750                      
 751                          if (index >= _rep->size)
 752                              return PEG_NOT_FOUND;
 753                      
 754                          Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 755 mike           1.30  
 756 mike           1.112     if (p)
 757 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 758 mike           1.30  
 759                          return PEG_NOT_FOUND;
 760                      }
 761                      
 762 mike           1.112 Uint32 StringFindAux(
 763                          const StringRep* _rep, const Char16* s, Uint32 n)
 764 mike           1.27  {
 765 mike           1.112     _checkNullPointer(s);
 766 mike           1.27  
 767 mike           1.112     const Uint16* data = _rep->data;
 768                          size_t rem = _rep->size;
 769                      
 770                          while (n <= rem)
 771 mike           1.30      {
 772 mike           1.112         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 773                      
 774                              if (!p)
 775                                  break;
 776 mike           1.30  
 777 mike           1.112         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 778 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 779 david.dillard  1.105 
 780 mike           1.112         p++;
 781                              rem -= p - data;
 782                              data = p;
 783 mike           1.27      }
 784 mike           1.112 
 785 mike           1.27      return PEG_NOT_FOUND;
 786                      }
 787                      
 788 mike           1.112 Uint32 String::find(const char* s) const
 789                      {
 790                          _checkNullPointer(s);
 791                      
 792                          // Note: could optimize away creation of temporary, but this is rarely
 793                          // called.
 794                          return find(String(s));
 795                      }
 796                      
 797 mike           1.27  Uint32 String::reverseFind(Char16 c) const
 798                      {
 799 mike           1.112     Uint16 x = c;
 800                          Uint16* p = _rep->data;
 801                          Uint16* q = _rep->data + _rep->size;
 802 mike           1.27  
 803 mike           1.112     while (q != p)
 804 mike           1.27      {
 805 mike           1.112         if (*--q == x)
 806 david.dillard  1.116             return static_cast<Uint32>(q - p);
 807 mike           1.27      }
 808                      
 809                          return PEG_NOT_FOUND;
 810                      }
 811                      
 812                      void String::toLower()
 813                      {
 814 david          1.69  #ifdef PEGASUS_HAS_ICU
 815 mike           1.112 
 816 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 817 david          1.90      {
 818 mike           1.114         if (_rep->refs.get() != 1)
 819 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 820                      
 821 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 822 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 823                              // prevents optimizations where the us-ascii is converted before
 824 mike           1.112         // calling ICU.
 825 yi.zhou        1.108         // The string may shrink or expand after the convert.
 826                      
 827 mike           1.112         //// First calculate size of resulting string. u_strToLower() returns
 828                              //// only the size when zero is passed as the destination size argument.
 829                      
 830 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 831                      
 832 mike           1.112         int32_t newSize = u_strToLower(
 833                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 834 david.dillard  1.116 
 835 mike           1.112         err = U_ZERO_ERROR;
 836                      
 837                              //// Reserve enough space for the result.
 838                      
 839                              if ((Uint32)newSize > _rep->cap)
 840                                  _reserve(_rep, newSize);
 841                      
 842                              //// Perform the conversion (overlapping buffers are allowed).
 843 chuck          1.99  
 844 mike           1.112         u_strToLower((UChar*)_rep->data, newSize,
 845                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 846 yi.zhou        1.108 
 847 mike           1.112         _rep->size = newSize;
 848                              return;
 849 david          1.90      }
 850 mike           1.112 
 851                      #endif /* PEGASUS_HAS_ICU */
 852                      
 853 mike           1.114     if (_rep->refs.get() != 1)
 854 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 855                      
 856                          Uint16* p = _rep->data;
 857                          size_t n = _rep->size;
 858                      
 859                          for (; n--; p++)
 860 david          1.90      {
 861 mike           1.112         if (!(*p & 0xFF00))
 862                                  *p = _toLower(*p);
 863 mike           1.27      }
 864 kumpf          1.39  }
 865                      
 866 chuck          1.99  void String::toUpper()
 867 david          1.90  {
 868                      #ifdef PEGASUS_HAS_ICU
 869 mike           1.112 
 870 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 871 chuck          1.99      {
 872 mike           1.114         if (_rep->refs.get() != 1)
 873 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 874                      
 875 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 876 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 877                              // prevents optimizations where the us-ascii is converted before
 878 mike           1.112         // calling ICU.
 879 yi.zhou        1.108         // The string may shrink or expand after the convert.
 880                      
 881 mike           1.112         //// First calculate size of resulting string. u_strToUpper() returns
 882                              //// only the size when zero is passed as the destination size argument.
 883                      
 884 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 885                      
 886 mike           1.112         int32_t newSize = u_strToUpper(
 887                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 888                      
 889                              err = U_ZERO_ERROR;
 890                      
 891                              //// Reserve enough space for the result.
 892                      
 893                              if ((Uint32)newSize > _rep->cap)
 894                                  _reserve(_rep, newSize);
 895                      
 896                              //// Perform the conversion (overlapping buffers are allowed).
 897                      
 898                              u_strToUpper((UChar*)_rep->data, newSize,
 899                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 900 chuck          1.99  
 901 mike           1.112         _rep->size = newSize;
 902 yi.zhou        1.108 
 903 mike           1.112         return;
 904 david          1.91      }
 905 mike           1.112 
 906                      #endif /* PEGASUS_HAS_ICU */
 907                      
 908 mike           1.114     if (_rep->refs.get() != 1)
 909 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 910                      
 911                          Uint16* p = _rep->data;
 912                          size_t n = _rep->size;
 913                      
 914                          for (; n--; p++)
 915                              *p = _toUpper(*p);
 916 david          1.90  }
 917                      
 918 kumpf          1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
 919 kumpf          1.39  {
 920 kumpf          1.118     const Uint16* p1 = s1._rep->data;
 921                          const Uint16* p2 = s2._rep->data;
 922 mike           1.27  
 923 kumpf          1.118     while (n--)
 924                          {
 925                              int r = *p1++ - *p2++;
 926                              if (r)
 927                              {
 928                                  return r;
 929                              }
 930                              else if (!p1[-1])
 931                              {
 932                                  // We must have encountered a null terminator in both s1 and s2
 933                                  return 0;
 934                              }
 935                          }
 936                          return 0;
 937 mike           1.27  }
 938                      
 939 kumpf          1.43  int String::compare(const String& s1, const String& s2)
 940 mike           1.30  {
 941 mike           1.112     return _compare(s1._rep->data, s2._rep->data);
 942                      }
 943 kumpf          1.43  
 944 mike           1.112 int String::compare(const String& s1, const char* s2)
 945                      {
 946                          _checkNullPointer(s2);
 947 mike           1.30  
 948 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
 949                          return _compareNoUTF8(s1._rep->data, s2);
 950                      #else
 951                          // ATTN: optimize this!
 952                          return String::compare(s1, String(s2));
 953                      #endif
 954 mike           1.30  }
 955                      
 956 mike           1.112 int String::compareNoCase(const String& str1, const String& str2)
 957 kumpf          1.40  {
 958 david          1.69  #ifdef PEGASUS_HAS_ICU
 959 mike           1.112 
 960 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 961                          {
 962 mike           1.112         return  u_strcasecmp(
 963 dave.sudlik    1.124             (const UChar*)str1._rep->data,
 964                                  (const UChar*)str2._rep->data,
 965                                  U_FOLD_CASE_DEFAULT
 966                                  );
 967 yi.zhou        1.108     }
 968 kumpf          1.40  
 969 mike           1.112 #endif /* PEGASUS_HAS_ICU */
 970                      
 971                          const Uint16* s1 = str1._rep->data;
 972                          const Uint16* s2 = str2._rep->data;
 973                      
 974                          while (*s1 && *s2)
 975 kumpf          1.40      {
 976 mike           1.112         int r = _toLower(*s1++) - _toLower(*s2++);
 977 kumpf          1.40  
 978 david.dillard  1.105         if (r)
 979                                  return r;
 980 kumpf          1.40      }
 981                      
 982 mike           1.112     if (*s2)
 983 david.dillard  1.105         return -1;
 984 mike           1.112     else if (*s1)
 985 david.dillard  1.105         return 1;
 986 kumpf          1.40  
 987                          return 0;
 988                      }
 989                      
 990 mike           1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
 991 mike           1.27  {
 992 mike           1.112 #ifdef PEGASUS_HAS_ICU
 993                      
 994                          return String::compareNoCase(s1, s2) == 0;
 995                      
 996                      #else /* PEGASUS_HAS_ICU */
 997 mike           1.27  
 998 mike           1.112     // The following employs loop unrolling for efficiency. Please do not
 999                          // eliminate.
1000 kumpf          1.39  
1001 mike           1.112     Uint16* p = (Uint16*)s1.getChar16Data();
1002                          Uint16* q = (Uint16*)s2.getChar16Data();
1003                          Uint32 n = s2.size();
1004                      
1005                          while (n >= 8)
1006                          {
1007                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1008                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1009                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1010                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1011                                  ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1012                                  ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1013                                  ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1014                                  ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1015                              {
1016                                  return false;
1017                              }
1018 kumpf          1.39  
1019 mike           1.112         n -= 8;
1020                              p += 8;
1021                              q += 8;
1022                          }
1023 mike           1.27  
1024 mike           1.112     while (n >= 4)
1025 kumpf          1.39      {
1026 mike           1.112         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1027                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1028                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1029                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1030 david.dillard  1.105         {
1031 mike           1.112             return false;
1032 david.dillard  1.105         }
1033 mike           1.112 
1034                              n -= 4;
1035                              p += 4;
1036                              q += 4;
1037                          }
1038                      
1039                          while (n--)
1040                          {
1041                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1042 david.dillard  1.105             return false;
1043 mike           1.112 
1044                              p++;
1045                              q++;
1046 kumpf          1.39      }
1047 mike           1.28  
1048 kumpf          1.39      return true;
1049 mike           1.112 
1050                      #endif /* PEGASUS_HAS_ICU */
1051 david          1.69  }
1052                      
1053 mike           1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1054 david          1.69  {
1055 mike           1.112     _checkNullPointer(s2);
1056 david          1.69  
1057 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1058 david          1.69  
1059 mike           1.112     return String::equalNoCase(s1, String(s2));
1060 david          1.69  
1061 mike           1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1062 david          1.69  
1063 mike           1.112     const Uint16* p1 = (Uint16*)s1._rep->data;
1064                          const char* p2 = s2;
1065                          size_t n = s1._rep->size;
1066 david.dillard  1.105 
1067 mike           1.112     while (n--)
1068                          {
1069                              if (!*p2)
1070                                  return false;
1071 david          1.71  
1072 mike           1.112         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1073                                  return false;
1074                          }
1075 kumpf          1.42  
1076 mike           1.112     if (*p2)
1077                              return false;
1078 david.dillard  1.116 
1079 mike           1.112     return true;
1080 karl           1.36  
1081 mike           1.112 #else /* PEGASUS_HAS_ICU */
1082 david.dillard  1.105 
1083 mike           1.112     // ATTN: optimize this!
1084                          return String::equalNoCase(s1, String(s2));
1085 david.dillard  1.105 
1086 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1087                      }
1088 chuck          1.78  
1089 mike           1.112 Boolean String::equal(const String& s1, const String& s2)
1090 karl           1.36  {
1091 marek          1.131     return (s1._rep == s2._rep) ||
1092 marek          1.137         ((s1._rep->size == s2._rep->size) &&
1093                               memcmp(s1._rep->data,
1094                                      s2._rep->data,
1095                                      s1._rep->size * sizeof(Uint16)) == 0);
1096 karl           1.36  }
1097                      
1098 mike           1.112 Boolean String::equal(const String& s1, const char* s2)
1099                      {
1100                      #ifdef PEGASUS_STRING_NO_UTF8
1101 kumpf          1.35  
1102 mike           1.112     _checkNullPointer(s2);
1103 kumpf          1.39  
1104 mike           1.112     const Uint16* p = (Uint16*)s1._rep->data;
1105                          const char* q = s2;
1106 kumpf          1.39  
1107 mike           1.112     while (*p && *q)
1108                          {
1109                              if (*p++ != Uint16(*q++))
1110                                  return false;
1111                          }
1112 kumpf          1.39  
1113 mike           1.112     return !(*p || *q);
1114 kumpf          1.39  
1115 mike           1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1116 kumpf          1.39  
1117 mike           1.112     return String::equal(s1, String(s2));
1118 kumpf          1.39  
1119 mike           1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1120 kumpf          1.39  }
1121                      
1122 kumpf          1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1123 kumpf          1.39  {
1124 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1125 david          1.69  
1126 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1127                          {
1128 david.dillard  1.105         char *buf = NULL;
1129                              const int size = str.size() * 6;
1130 mike           1.112         UnicodeString UniStr(
1131                                  (const UChar *)str.getChar16Data(), (int32_t)str.size());
1132 david.dillard  1.105         Uint32 bufsize = UniStr.extract(0,size,buf);
1133                              buf = new char[bufsize+1];
1134                              UniStr.extract(0,bufsize,buf);
1135                              os << buf;
1136                              os.flush();
1137                              delete [] buf;
1138 david.dillard  1.116         return os;
1139 yi.zhou        1.108     }
1140 mike           1.112 
1141 david.dillard  1.116 #endif  // PEGASUS_HAS_ICU
1142 mike           1.112 
1143                          for (Uint32 i = 0, n = str.size(); i < n; i++)
1144 yi.zhou        1.108     {
1145 mike           1.112         Uint16 code = str[i];
1146 david.dillard  1.105 
1147 mike           1.112         if (code > 0 && !(code & 0xFF00))
1148                                      os << char(code);
1149                              else
1150                                  {
1151                                  // Print in hex format:
1152                                  char buffer[8];
1153                                  sprintf(buffer, "\\x%04X", code);
1154                                  os << buffer;
1155 david.dillard  1.105         }
1156 yi.zhou        1.108     }
1157 kumpf          1.39  
1158                          return os;
1159                      }
1160                      
1161 mike           1.112 void StringAppendCharAux(StringRep*& _rep)
1162 kumpf          1.39  {
1163 mike           1.112     StringRep* tmp;
1164                      
1165                          if (_rep->cap)
1166                          {
1167                              tmp = StringRep::alloc(2 * _rep->cap);
1168                              tmp->size = _rep->size;
1169                              _copy(tmp->data, _rep->data, _rep->size);
1170                          }
1171                          else
1172                          {
1173                              tmp = StringRep::alloc(8);
1174                              tmp->size = 0;
1175                          }
1176                      
1177                          StringRep::unref(_rep);
1178                          _rep = tmp;
1179 kumpf          1.39  }
1180                      
1181 thilo.boehm    1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1182                      {
1183                          class StringLayout
1184                          {
1185                          public:
1186                              StringRep* rep;
1187                          };
1188                      
1189 kumpf          1.130     StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1190 thilo.boehm    1.128 
1191                          _checkNullPointer(str);
1192                      
1193                          if (n > that->rep->cap || that->rep->refs.get() != 1)
1194                          {
1195                              StringRep::unref(that->rep);
1196                              that->rep = StringRep::alloc(n);
1197                          }
1198                      
1199                          _copy(that->rep->data, str, n);
1200                          that->rep->size = n;
1201                          that->rep->data[that->rep->size] = 0;
1202                      }
1203                      
1204 mike           1.112 PEGASUS_NAMESPACE_END
1205                      
1206                      /*
1207                      ================================================================================
1208                      
1209                      String optimizations:
1210                      
1211                          1.  Added mechanism allowing certain functions to be inlined only when
1212                              used by internal Pegasus modules. External modules (i.e., providers)
1213                              link to a non-inline version, which allows for binary compatibility.
1214                      
1215                          2.  Implemented copy-on-write with atomic increment/decrement. This
1216                              yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1217                              for the 'ni1000' benchmark.
1218                      
1219                          3.  Employed loop unrolling in several places. For example, see:
1220                      
1221                                  static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1222                      
1223                          4.  Used the "empty-rep" optimization (described in whitepaper from the
1224                              GCC Developers Summit). This reduced default construction to a simple
1225 mike           1.112         pointer assignment.
1226                      
1227                                  inline String::String() : _rep(&_emptyRep) { }
1228                      
1229                          5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1230                              For example:
1231                      
1232                                  static const char _upper[] =
1233                                  {
1234                                      0,1,2,...255
1235                                  };
1236                      
1237                                  inline Uint16 _toUpper(Uint16 x)
1238                                  {
1239                                      return (x & 0xFF00) ? x : _upper[x];
1240                                  }
1241                      
1242 david.dillard  1.116         This outperforms the system implementation by avoiding an anding
1243 mike           1.112         operation.
1244                      
1245 david.dillard  1.116     6.  Implemented char* version of the following member functions to
1246                              eliminate unecessary creation of anonymous string objects
1247 mike           1.112         (temporaries).
1248                      
1249                                  String(const String& s1, const char* s2);
1250                                  String(const char* s1, const String& s2);
1251                                  String& String::operator=(const char* str);
1252                                  Uint32 String::find(const char* s) const;
1253                                  bool String::equal(const String& s1, const char* s2);
1254                                  static int String::compare(const String& s1, const char* s2);
1255                                  String& String::append(const char* str);
1256                                  String& String::append(const char* str, Uint32 size);
1257                                  static bool String::equalNoCase(const String& s1, const char* s2);
1258                                  String& operator=(const char* str)
1259                                  String& String::assign(const char* str)
1260                                  String& String::append(const char* str)
1261                                  Boolean operator==(const String& s1, const char* s2)
1262                                  Boolean operator==(const char* s1, const String& s2)
1263                                  Boolean operator!=(const String& s1, const char* s2)
1264                                  Boolean operator!=(const char* s1, const String& s2)
1265                                  Boolean operator<(const String& s1, const char* s2)
1266                                  Boolean operator<(const char* s1, const String& s2)
1267                                  Boolean operator>(const String& s1, const char* s2)
1268 mike           1.112             Boolean operator>(const char* s1, const String& s2)
1269                                  Boolean operator<=(const String& s1, const char* s2)
1270                                  Boolean operator<=(const char* s1, const String& s2)
1271                                  Boolean operator>=(const String& s1, const char* s2)
1272                                  Boolean operator>=(const char* s1, const String& s2)
1273                                  String operator+(const String& s1, const char* s2)
1274                                  String operator+(const char* s1, const String& s2)
1275                      
1276 david.dillard  1.116     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1277 mike           1.112         power of two (algorithm from the book "Hacker's Delight").
1278                      
1279                                  static Uint32 _roundUpToPow2(Uint32 x)
1280                                  {
1281                                      if (x < 8)
1282                                          return 8;
1283                      
1284                                      x--;
1285                                      x |= (x >> 1);
1286                                      x |= (x >> 2);
1287                                      x |= (x >> 4);
1288                                      x |= (x >> 8);
1289                                      x |= (x >> 16);
1290                                      x++;
1291                      
1292                                      return x;
1293                                  }
1294                      
1295                          8.  Implemented "concatenating constructors" to eliminate temporaries
1296 david.dillard  1.116         created by operator+(). This scheme employs the "return-value
1297 mike           1.112         optimization" described by Stan Lippman.
1298                      
1299                                  inline String operator+(const String& s1, const String& s2)
1300                                  {
1301                                      return String(s1, s2, 0);
1302                                  }
1303                      
1304                          9.  Experimented to find the optimial initial size for a short string.
1305                              Eight seems to offer the best tradeoff between space and time.
1306                      
1307                          10. Inlined all members of the Char16 class.
1308                      
1309                          11. Used Uint16 internally in the String class. This showed no improvememnt
1310                              since Char16 was already fully inlined and was essentially reduced to
1311                              Uint16 in any case.
1312                      
1313                          12. Implemented conditional logic (#if) allowing error checking logic to
1314 david.dillard  1.116         be excluded to better performance. Examples include bounds checking
1315 mike           1.112         and null-pointer checking.
1316                      
1317                          13. Used memcpy() and memcmp() where possible. These are implemented using
1318                              the rep family of intructions under Intel and are much faster.
1319                      
1320 david.dillard  1.116     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1321 mike           1.112         copy routine overhead.
1322                      
1323                          15. Added ASCII7 form of the constructor and assign().
1324                      
1325                                  String s("hello world", String::ASCII7);
1326                      
1327                                  s.assignASCII7("hello world");
1328                      
1329                              This avoids slower UTF8 processing when not needed.
1330                      
1331                      ================================================================================
1332                      */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2