(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
   2 martin 1.134 //
   3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
   4              // agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
   5              // this work for additional information regarding copyright ownership.
   6              // Each contributor licenses this file to you under the OpenPegasus Open
   7              // Source License; you may not use this file except in compliance with the
   8              // License.
   9 martin 1.134 //
  10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
  11              // copy of this software and associated documentation files (the "Software"),
  12              // to deal in the Software without restriction, including without limitation
  13              // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  14              // and/or sell copies of the Software, and to permit persons to whom the
  15              // Software is furnished to do so, subject to the following conditions:
  16 martin 1.134 //
  17 martin 1.133 // The above copyright notice and this permission notice shall be included
  18              // in all copies or substantial portions of the Software.
  19 martin 1.134 //
  20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23              // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  24              // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25              // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26              // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 martin 1.134 //
  28 martin 1.133 //////////////////////////////////////////////////////////////////////////
  29 mike   1.27  //
  30              //%/////////////////////////////////////////////////////////////////////////////
  31              
  32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  33 mike           1.113 #include <cstring>
  34 kumpf          1.48  #include "InternalException.h"
  35 mike           1.112 #include "MessageLoader.h"
  36                      #include "StringRep.h"
  37 david          1.69  
  38                      #ifdef PEGASUS_HAS_ICU
  39 kumpf          1.132 # include <unicode/ures.h>
  40                      # include <unicode/ustring.h>
  41                      # include <unicode/uchar.h>
  42 david          1.69  #endif
  43                      
  44 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  45 mike           1.28  
  46 mike           1.112 //==============================================================================
  47                      //
  48                      // Compile-time macros (undefined by default).
  49                      //
  50                      //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  51                      //
  52                      //==============================================================================
  53 mike           1.27  
  54 mike           1.112 //==============================================================================
  55 kumpf          1.39  //
  56 mike           1.112 // File-scope definitions:
  57 kumpf          1.54  //
  58 mike           1.112 //==============================================================================
  59                      
  60                      // Note: this table is much faster than the system toupper(). Please do not
  61                      // change.
  62 kumpf          1.54  
  63 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  64 kumpf          1.54  {
  65 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  66                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  67                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  68                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  69                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  70                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  71                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  72                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  73                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  74                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  75                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  76                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  77                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  78                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  79                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  80                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  81                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  82                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  83                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  84                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  85                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  86 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  87                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  88                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  89                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  90                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  91                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  92                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  93                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
  94                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
  95                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
  96                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
  97                      };
  98                      
  99 dev.meetei     1.139 // Note: this table is much faster than the system tolower(). Please do not
 100 mike           1.112 // change.
 101                      
 102 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 103 mike           1.112 {
 104                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 105                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 106                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 107                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 108                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 109                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 110                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 111                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 112                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 113                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 114                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 115                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 116                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 117                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 118                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 119                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 120                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 121                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 122                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 123                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 124 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 125                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 126                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 127                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 128                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 129                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 130                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 131                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 132                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 133                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 134                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 135                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 136                      };
 137                      
 138                      // Converts 16-bit characters to upper case. This routine is faster than the
 139                      // system toupper(). Please do not change.
 140                      inline Uint16 _toUpper(Uint16 x)
 141                      {
 142                          return (x & 0xFF00) ? x : _toUpperTable[x];
 143 kumpf          1.54  }
 144                      
 145 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 146                      // system toupper(). Please do not change.
 147                      inline Uint16 _toLower(Uint16 x)
 148 kumpf          1.54  {
 149 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 150                      }
 151                      
 152                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 153                      static Uint32 _roundUpToPow2(Uint32 x)
 154                      {
 155 dave.sudlik    1.120     // Check for potential overflow in x
 156                          PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 157 mike           1.112 
 158                          if (x < 8)
 159                              return 8;
 160                      
 161                          x--;
 162                          x |= (x >> 1);
 163                          x |= (x >> 2);
 164                          x |= (x >> 4);
 165                          x |= (x >> 8);
 166                          x |= (x >> 16);
 167                          x++;
 168                      
 169                          return x;
 170                      }
 171                      
 172                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 173                      {
 174                          // The following employs loop unrolling for efficiency. Please do not
 175                          // eliminate.
 176                      
 177                          while (n >= 4)
 178 mike           1.112     {
 179                              if (s[0] == c)
 180                                  return (Uint16*)s;
 181                              if (s[1] == c)
 182                                  return (Uint16*)&s[1];
 183                              if (s[2] == c)
 184                                  return (Uint16*)&s[2];
 185                              if (s[3] == c)
 186                                  return (Uint16*)&s[3];
 187 kumpf          1.82  
 188 mike           1.112         n -= 4;
 189                              s += 4;
 190                          }
 191                      
 192                          if (n)
 193                          {
 194                              if (*s == c)
 195                                  return (Uint16*)s;
 196                              s++;
 197                              n--;
 198                          }
 199                      
 200                          if (n)
 201                          {
 202                              if (*s == c)
 203                                  return (Uint16*)s;
 204                              s++;
 205                              n--;
 206                          }
 207                      
 208                          if (n && *s == c)
 209 mike           1.112         return (Uint16*)s;
 210                      
 211                          // Not found!
 212                          return 0;
 213                      }
 214                      
 215                      static int _compare(const Uint16* s1, const Uint16* s2)
 216                      {
 217                          while (*s1 && *s2)
 218                          {
 219                              int r = *s1++ - *s2++;
 220                      
 221                              if (r)
 222                                  return r;
 223                          }
 224                      
 225                          if (*s2)
 226                              return -1;
 227                          else if (*s1)
 228                              return 1;
 229                      
 230 mike           1.112     return 0;
 231                      }
 232                      
 233 kumpf          1.130 #ifdef PEGASUS_STRING_NO_UTF8
 234 mike           1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2)
 235                      {
 236                          Uint16 c1;
 237                          Uint16 c2;
 238                      
 239                          do
 240                          {
 241                              c1 = *s1++;
 242                              c2 = *s2++;
 243                      
 244                              if (c1 == 0)
 245                                  return c1 - c2;
 246                          }
 247                          while (c1 == c2);
 248                      
 249                          return c1 - c2;
 250                      }
 251 kumpf          1.130 #endif
 252 mike           1.112 
 253                      static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 254                      {
 255                          memcpy(s1, s2, n * sizeof(Uint16));
 256                      }
 257                      
 258                      void StringThrowOutOfBounds()
 259                      {
 260                          throw IndexOutOfBoundsException();
 261                      }
 262                      
 263                      inline void _checkNullPointer(const void* ptr)
 264                      {
 265                          if (!ptr)
 266                              throw NullPointer();
 267                      }
 268                      
 269 thilo.boehm    1.138 #define BADUTF8_MAX_CLEAR_CHAR 40
 270                      #define BADUTF8_MAX_CHAR_TO_HEX 10
 271                      
 272                      static void _formatBadUTF8Chars(
 273                          char* buffer,
 274                          Uint32 index,
 275                          const char* q,
 276                          size_t n )
 277 mike           1.112 {
 278 thilo.boehm    1.138 
 279                          char tmp[20];
 280                          const char* start;
 281                      
 282                          size_t clearChar =
 283                              (( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR );
 284                          size_t charToHex =
 285                              ((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ?
 286                                  (n-index-1) : BADUTF8_MAX_CHAR_TO_HEX );
 287                      
 288                          if (index < BADUTF8_MAX_CLEAR_CHAR)
 289                          {
 290                              start = q;
 291                          } else
 292                          {
 293                              start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]);
 294                          }
 295                      
 296                          // Intialize the buffer with the first character as '\0' to be able to use
 297                          // strnchat() and strcat()
 298                          buffer[0] = 0;
 299 thilo.boehm    1.138     // Start the buffer with the valid UTF8 chars
 300                          strncat(buffer,start,clearChar);
 301                          for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ )
 302                          {
 303                              tmp[0] = 0;
 304                              sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]);
 305                              strncat(buffer,&(tmp[0]),5);
 306                          }
 307                      
 308                      }
 309                      
 310                      static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n)
 311                      {
 312                          char buffer[1024];
 313                      
 314                          _formatBadUTF8Chars(&(buffer[0]),index,q,n);
 315                      
 316 mike           1.112     MessageLoaderParms parms(
 317 thilo.boehm    1.138         "Common.String.BAD_UTF8_LONG",
 318 mike           1.112         "The byte sequence starting at index $0 "
 319 thilo.boehm    1.138         "is not valid UTF-8 encoding: $1",
 320                              index,buffer);
 321                      
 322 mike           1.112     throw Exception(parms);
 323                      }
 324                      
 325 david.dillard  1.116 // Note: dest must be at least three times src (plus an extra byte for
 326 mike           1.112 // terminator).
 327                      static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 328                      {
 329                          // The following employs loop unrolling for efficiency. Please do not
 330                          // eliminate.
 331                      
 332                          const Uint16* q = src;
 333                          Uint8* p = (Uint8*)dest;
 334                      
 335                          while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 336 kumpf          1.82      {
 337 mike           1.112         p[0] = q[0];
 338                              p[1] = q[1];
 339                              p[2] = q[2];
 340                              p[3] = q[3];
 341                              p += 4;
 342                              q += 4;
 343                              n -= 4;
 344 kumpf          1.82      }
 345 mike           1.112 
 346                          switch (n)
 347                          {
 348                              case 0:
 349                                  return p - (Uint8*)dest;
 350                              case 1:
 351                                  if (q[0] < 128)
 352                                  {
 353                                      p[0] = q[0];
 354                                      return p + 1 - (Uint8*)dest;
 355                                  }
 356                                  break;
 357                              case 2:
 358                                  if (q[0] < 128 && q[1] < 128)
 359                                  {
 360                                      p[0] = q[0];
 361                                      p[1] = q[1];
 362                                      return p + 2 - (Uint8*)dest;
 363                                  }
 364                                  break;
 365                              case 3:
 366 mike           1.112             if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 367                                  {
 368                                      p[0] = q[0];
 369                                      p[1] = q[1];
 370                                      p[2] = q[2];
 371                                      return p + 3 - (Uint8*)dest;
 372                                  }
 373                                  break;
 374                          }
 375                      
 376                          // If this line was reached, there must be characters greater than 128.
 377                      
 378                          UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 379                      
 380                          return p - (Uint8*)dest;
 381 kumpf          1.54  }
 382                      
 383 mike           1.112 //==============================================================================
 384                      //
 385                      // class CString
 386                      //
 387                      //==============================================================================
 388                      
 389                      CString::CString(const CString& cstr) : _rep(0)
 390 kumpf          1.54  {
 391 mike           1.112     if (cstr._rep)
 392 kumpf          1.82      {
 393 mike           1.112         size_t n = strlen(cstr._rep) + 1;
 394                              _rep = (char*)operator new(n);
 395                              memcpy(_rep, cstr._rep, n);
 396 kumpf          1.82      }
 397 kumpf          1.54  }
 398                      
 399 kumpf          1.56  CString& CString::operator=(const CString& cstr)
 400                      {
 401 kumpf          1.82      if (&cstr != this)
 402 kumpf          1.81      {
 403 kumpf          1.82          if (_rep)
 404                              {
 405 mike           1.112             operator delete(_rep);
 406 kumpf          1.82              _rep = 0;
 407                              }
 408 mike           1.112 
 409 kumpf          1.82          if (cstr._rep)
 410                              {
 411 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 412                                  _rep = (char*)operator new(n);
 413                                  memcpy(_rep, cstr._rep, n);
 414 kumpf          1.82          }
 415 kumpf          1.81      }
 416 mike           1.112 
 417 kumpf          1.56      return *this;
 418                      }
 419                      
 420 mike           1.112 //==============================================================================
 421 kumpf          1.54  //
 422 mike           1.112 // class StringRep
 423 kumpf          1.39  //
 424 mike           1.112 //==============================================================================
 425 kumpf          1.39  
 426 mike           1.112 StringRep StringRep::_emptyRep;
 427 mike           1.27  
 428 mike           1.112 inline StringRep* StringRep::alloc(size_t cap)
 429 mike           1.27  {
 430 dave.sudlik    1.120     // Check for potential overflow in cap
 431                          PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 432 mike           1.27  
 433 mike           1.112     StringRep* rep = (StringRep*)::operator new(
 434                              sizeof(StringRep) + cap * sizeof(Uint16));
 435                          rep->cap = cap;
 436                          new(&rep->refs) AtomicInt(1);
 437                      
 438                          return rep;
 439 mike           1.27  }
 440                      
 441 mike           1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
 442 chuck          1.102 {
 443 mike           1.114     if (cap > rep->cap || rep->refs.get() != 1)
 444 chuck          1.102     {
 445 mike           1.112         size_t n = _roundUpToPow2(cap);
 446                              StringRep* newRep = StringRep::alloc(n);
 447                              newRep->size = rep->size;
 448                              _copy(newRep->data, rep->data, rep->size + 1);
 449                              StringRep::unref(rep);
 450                              rep = newRep;
 451                          }
 452                      }
 453 david.dillard  1.105 
 454 mike           1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
 455                      {
 456                          StringRep* rep = StringRep::alloc(size);
 457                          rep->size = size;
 458                          _copy(rep->data, data, size);
 459                          rep->data[size] = '\0';
 460                          return rep;
 461                      }
 462 chuck          1.102 
 463 mike           1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
 464                      {
 465                          // Return a new copy of rep. Release rep.
 466 chuck          1.102 
 467 mike           1.112     StringRep* newRep = StringRep::alloc(rep->size);
 468                          newRep->size = rep->size;
 469                          _copy(newRep->data, rep->data, rep->size);
 470                          newRep->data[newRep->size] = '\0';
 471                          StringRep::unref(rep);
 472                          return newRep;
 473 chuck          1.102 }
 474                      
 475 mike           1.112 StringRep* StringRep::create(const char* data, size_t size)
 476 kumpf          1.43  {
 477 mike           1.112     StringRep* rep = StringRep::alloc(size);
 478                          size_t utf8_error_index;
 479                          rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 480                      
 481                          if (rep->size == size_t(-1))
 482                          {
 483                              StringRep::free(rep);
 484 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index, data,size);
 485 mike           1.112     }
 486 kumpf          1.43  
 487 mike           1.112     rep->data[rep->size] = '\0';
 488 kumpf          1.43  
 489 mike           1.112     return rep;
 490 mike           1.27  }
 491                      
 492 mike           1.112 Uint32 StringRep::length(const Uint16* str)
 493 mike           1.27  {
 494 mike           1.112     // Note: We could unroll this but it is rarely called.
 495                      
 496                          const Uint16* end = (Uint16*)str;
 497                      
 498                          while (*end++)
 499                              ;
 500                      
 501 a.dunfey       1.125     return (Uint32)(end - str - 1);
 502 kumpf          1.39  }
 503 tony           1.66  
 504 mike           1.112 //==============================================================================
 505                      //
 506                      // class String
 507                      //
 508                      //==============================================================================
 509                      
 510                      const String String::EMPTY;
 511 mike           1.27  
 512 kumpf          1.39  String::String(const String& str, Uint32 n)
 513                      {
 514 mike           1.112     _checkBounds(n, str._rep->size);
 515                          _rep = StringRep::create(str._rep->data, n);
 516 kumpf          1.39  }
 517                      
 518                      String::String(const Char16* str)
 519                      {
 520 mike           1.112     _checkNullPointer(str);
 521                          _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 522 mike           1.27  }
 523                      
 524 kumpf          1.39  String::String(const Char16* str, Uint32 n)
 525                      {
 526 mike           1.112     _checkNullPointer(str);
 527                          _rep = StringRep::create((Uint16*)str, n);
 528 kumpf          1.39  }
 529                      
 530                      String::String(const char* str)
 531 mike           1.27  {
 532 mike           1.112     _checkNullPointer(str);
 533 david.dillard  1.105 
 534 mike           1.112     // Set this just in case create() throws an exception.
 535                          _rep = &StringRep::_emptyRep;
 536                          _rep = StringRep::create(str, strlen(str));
 537 mike           1.27  }
 538                      
 539 kumpf          1.39  String::String(const char* str, Uint32 n)
 540 mike           1.27  {
 541 mike           1.112     _checkNullPointer(str);
 542 david.dillard  1.105 
 543 mike           1.112     // Set this just in case create() throws an exception.
 544                          _rep = &StringRep::_emptyRep;
 545                          _rep = StringRep::create(str, n);
 546 kumpf          1.39  }
 547 mike           1.27  
 548 mike           1.112 String::String(const String& s1, const String& s2)
 549 kumpf          1.39  {
 550 mike           1.112     size_t n1 = s1._rep->size;
 551                          size_t n2 = s2._rep->size;
 552                          size_t n = n1 + n2;
 553                          _rep = StringRep::alloc(n);
 554                          _copy(_rep->data, s1._rep->data, n1);
 555                          _copy(_rep->data + n1, s2._rep->data, n2);
 556                          _rep->size = n;
 557                          _rep->data[n] = '\0';
 558 mike           1.27  }
 559                      
 560 mike           1.112 String::String(const String& s1, const char* s2)
 561 mike           1.27  {
 562 mike           1.112     _checkNullPointer(s2);
 563                          size_t n1 = s1._rep->size;
 564                          size_t n2 = strlen(s2);
 565                          _rep = StringRep::alloc(n1 + n2);
 566                          _copy(_rep->data, s1._rep->data, n1);
 567                          size_t utf8_error_index;
 568                          size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 569                      
 570                          if (tmp == size_t(-1))
 571 kumpf          1.82      {
 572 mike           1.112         StringRep::free(_rep);
 573                              _rep = &StringRep::_emptyRep;
 574 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);
 575 kumpf          1.82      }
 576 mike           1.112 
 577                          _rep->size = n1 + tmp;
 578                          _rep->data[_rep->size] = '\0';
 579 mike           1.27  }
 580                      
 581 mike           1.112 String::String(const char* s1, const String& s2)
 582 mike           1.27  {
 583 mike           1.112     _checkNullPointer(s1);
 584                          size_t n1 = strlen(s1);
 585                          size_t n2 = s2._rep->size;
 586                          _rep = StringRep::alloc(n1 + n2);
 587                          size_t utf8_error_index;
 588                          size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 589                      
 590                          if (tmp ==  size_t(-1))
 591                          {
 592                              StringRep::free(_rep);
 593                              _rep = &StringRep::_emptyRep;
 594 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);
 595 mike           1.112     }
 596                      
 597                          _rep->size = n2 + tmp;
 598                          _copy(_rep->data + n1, s2._rep->data, n2);
 599                          _rep->data[_rep->size] = '\0';
 600 mike           1.27  }
 601                      
 602 mike           1.112 String& String::assign(const String& str)
 603 mike           1.27  {
 604 mike           1.112     if (_rep != str._rep)
 605 david.dillard  1.105     {
 606 mike           1.112         StringRep::unref(_rep);
 607                              StringRep::ref(_rep = str._rep);
 608 david.dillard  1.105     }
 609                      
 610 mike           1.27      return *this;
 611                      }
 612                      
 613                      String& String::assign(const Char16* str, Uint32 n)
 614                      {
 615 mike           1.112     _checkNullPointer(str);
 616                      
 617 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 618 david.dillard  1.105     {
 619 mike           1.112         StringRep::unref(_rep);
 620                              _rep = StringRep::alloc(n);
 621 david.dillard  1.105     }
 622                      
 623 mike           1.112     _rep->size = n;
 624                          _copy(_rep->data, (Uint16*)str, n);
 625                          _rep->data[n] = '\0';
 626                      
 627 mike           1.27      return *this;
 628                      }
 629                      
 630 mike           1.112 String& String::assign(const char* str, Uint32 n)
 631 chuck          1.102 {
 632 mike           1.112     _checkNullPointer(str);
 633                      
 634 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 635 david.dillard  1.105     {
 636 mike           1.112         StringRep::unref(_rep);
 637                              _rep = StringRep::alloc(n);
 638 david.dillard  1.105     }
 639                      
 640 mike           1.112     size_t utf8_error_index;
 641                          _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 642 chuck          1.102 
 643 mike           1.112     if (_rep->size ==  size_t(-1))
 644 david.dillard  1.105     {
 645 mike           1.112         StringRep::free(_rep);
 646                              _rep = &StringRep::_emptyRep;
 647 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index,str,n);
 648 david.dillard  1.105     }
 649 mike           1.112 
 650                          _rep->data[_rep->size] = 0;
 651 david.dillard  1.105 
 652 mike           1.27      return *this;
 653                      }
 654                      
 655 kumpf          1.39  void String::clear()
 656                      {
 657 mike           1.112     if (_rep->size)
 658                          {
 659 mike           1.114         if (_rep->refs.get() == 1)
 660 mike           1.112         {
 661                                  _rep->size = 0;
 662                                  _rep->data[0] = '\0';
 663                              }
 664                              else
 665                              {
 666                                  StringRep::unref(_rep);
 667                                  _rep = &StringRep::_emptyRep;
 668                              }
 669                          }
 670 kumpf          1.39  }
 671                      
 672 mike           1.112 void String::reserveCapacity(Uint32 cap)
 673 kumpf          1.39  {
 674 mike           1.112     _reserve(_rep, cap);
 675 kumpf          1.39  }
 676                      
 677 mike           1.112 CString String::getCString() const
 678                      {
 679 david.dillard  1.116     // A UTF8 string can have three times as many characters as its UTF16
 680                          // counterpart, so we allocate extra memory for the worst case. In the
 681 mike           1.112     // best case, we may need only one third of the memory allocated. But
 682 david.dillard  1.116     // downsizing the string afterwards is expensive and unecessary since
 683                          // CString objects are usually short-lived (disappearing after only a few
 684 mike           1.112     // instructions). CString objects are typically created on the stack as
 685                          // means to obtain a char* pointer.
 686                      
 687                      #ifdef PEGASUS_STRING_NO_UTF8
 688                          char* str = (char*)operator new(_rep->size + 1);
 689                          _copy(str, _rep->data, _rep->size);
 690                          str[_rep->size] = '\0';
 691                          return CString(str);
 692 gs.keenan      1.110 #else
 693 a.dunfey       1.125     Uint32 n = (Uint32)(3 * _rep->size);
 694 mike           1.112     char* str = (char*)operator new(n + 1);
 695                          size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 696                          str[size] = '\0';
 697                          return CString(str);
 698 gs.keenan      1.110 #endif
 699 kumpf          1.39  }
 700                      
 701 mike           1.112 String& String::append(const Char16* str, Uint32 n)
 702 kumpf          1.39  {
 703 mike           1.112     _checkNullPointer(str);
 704                      
 705                          size_t oldSize = _rep->size;
 706                          size_t newSize = oldSize + n;
 707 a.dunfey       1.125     _reserve(_rep, (Uint32)newSize);
 708 mike           1.112     _copy(_rep->data + oldSize, (Uint16*)str, n);
 709                          _rep->size = newSize;
 710                          _rep->data[newSize] = '\0';
 711                      
 712                          return *this;
 713 kumpf          1.39  }
 714                      
 715 mike           1.112 String& String::append(const String& str)
 716 mike           1.27  {
 717 a.dunfey       1.125     return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
 718 mike           1.27  }
 719                      
 720 mike           1.112 String& String::append(const char* str, Uint32 size)
 721 mike           1.27  {
 722 mike           1.112     _checkNullPointer(str);
 723                      
 724                          size_t oldSize = _rep->size;
 725                          size_t cap = oldSize + size;
 726                      
 727 a.dunfey       1.125     _reserve(_rep, (Uint32)cap);
 728 mike           1.112     size_t utf8_error_index;
 729                          size_t tmp = _convert(
 730                              (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 731                      
 732                          if (tmp ==  size_t(-1))
 733                          {
 734                              StringRep::free(_rep);
 735                              _rep = &StringRep::_emptyRep;
 736 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index,str,size);
 737 mike           1.112     }
 738 mike           1.27  
 739 mike           1.112     _rep->size += tmp;
 740                          _rep->data[_rep->size] = '\0';
 741 mike           1.27  
 742 kumpf          1.39      return *this;
 743                      }
 744                      
 745 mike           1.112 void String::remove(Uint32 index, Uint32 n)
 746 mike           1.27  {
 747 mike           1.112     if (n == PEG_NOT_FOUND)
 748 a.dunfey       1.125         n = (Uint32)(_rep->size - index);
 749 mike           1.112 
 750                          _checkBounds(index + n, _rep->size);
 751                      
 752 mike           1.114     if (_rep->refs.get() != 1)
 753 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 754 mike           1.27  
 755 jim.wunderlich 1.115     PEGASUS_ASSERT(index + n <= _rep->size);
 756 mike           1.27  
 757 mike           1.112     size_t rem = _rep->size - (index + n);
 758                          Uint16* data = _rep->data;
 759 mike           1.27  
 760 mike           1.112     if (rem)
 761                              memmove(data + index, data + index + n, rem * sizeof(Uint16));
 762 mike           1.27  
 763 mike           1.112     _rep->size -= n;
 764                          data[_rep->size] = '\0';
 765 mike           1.27  }
 766                      
 767 mike           1.112 String String::subString(Uint32 index, Uint32 n) const
 768 mike           1.27  {
 769 mike           1.112     // Note: this implementation is very permissive but used for
 770                          // backwards compatibility.
 771                      
 772                          if (index < _rep->size)
 773 mike           1.27      {
 774 mike           1.112         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 775 a.dunfey       1.125             n = (Uint32)(_rep->size - index);
 776 mike           1.27  
 777 w.otsuka       1.121         return String((Char16*)(_rep->data + index), n);
 778 mike           1.27      }
 779 david.dillard  1.105 
 780                          return String();
 781 mike           1.27  }
 782                      
 783                      Uint32 String::find(Char16 c) const
 784                      {
 785 mike           1.112     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 786 mike           1.27  
 787 mike           1.112     if (p)
 788 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 789 mike           1.27  
 790                          return PEG_NOT_FOUND;
 791                      }
 792                      
 793 kumpf          1.53  Uint32 String::find(Uint32 index, Char16 c) const
 794 mike           1.30  {
 795 mike           1.112     _checkBounds(index, _rep->size);
 796                      
 797                          if (index >= _rep->size)
 798                              return PEG_NOT_FOUND;
 799                      
 800                          Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 801 mike           1.30  
 802 mike           1.112     if (p)
 803 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 804 mike           1.30  
 805                          return PEG_NOT_FOUND;
 806                      }
 807                      
 808 mike           1.112 Uint32 StringFindAux(
 809                          const StringRep* _rep, const Char16* s, Uint32 n)
 810 mike           1.27  {
 811 mike           1.112     _checkNullPointer(s);
 812 mike           1.27  
 813 mike           1.112     const Uint16* data = _rep->data;
 814                          size_t rem = _rep->size;
 815                      
 816                          while (n <= rem)
 817 mike           1.30      {
 818 mike           1.112         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 819                      
 820                              if (!p)
 821                                  break;
 822 mike           1.30  
 823 mike           1.112         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 824 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 825 david.dillard  1.105 
 826 mike           1.112         p++;
 827                              rem -= p - data;
 828                              data = p;
 829 mike           1.27      }
 830 mike           1.112 
 831 mike           1.27      return PEG_NOT_FOUND;
 832                      }
 833                      
 834 mike           1.112 Uint32 String::find(const char* s) const
 835                      {
 836                          _checkNullPointer(s);
 837                      
 838                          // Note: could optimize away creation of temporary, but this is rarely
 839                          // called.
 840                          return find(String(s));
 841                      }
 842                      
 843 mike           1.27  Uint32 String::reverseFind(Char16 c) const
 844                      {
 845 mike           1.112     Uint16 x = c;
 846                          Uint16* p = _rep->data;
 847                          Uint16* q = _rep->data + _rep->size;
 848 mike           1.27  
 849 mike           1.112     while (q != p)
 850 mike           1.27      {
 851 mike           1.112         if (*--q == x)
 852 david.dillard  1.116             return static_cast<Uint32>(q - p);
 853 mike           1.27      }
 854                      
 855                          return PEG_NOT_FOUND;
 856                      }
 857                      
 858                      void String::toLower()
 859                      {
 860 david          1.69  #ifdef PEGASUS_HAS_ICU
 861 mike           1.112 
 862 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 863 david          1.90      {
 864 mike           1.114         if (_rep->refs.get() != 1)
 865 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 866                      
 867 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 868 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 869                              // prevents optimizations where the us-ascii is converted before
 870 mike           1.112         // calling ICU.
 871 yi.zhou        1.108         // The string may shrink or expand after the convert.
 872                      
 873 mike           1.112         //// First calculate size of resulting string. u_strToLower() returns
 874                              //// only the size when zero is passed as the destination size argument.
 875                      
 876 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 877                      
 878 mike           1.112         int32_t newSize = u_strToLower(
 879                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 880 david.dillard  1.116 
 881 mike           1.112         err = U_ZERO_ERROR;
 882                      
 883                              //// Reserve enough space for the result.
 884                      
 885                              if ((Uint32)newSize > _rep->cap)
 886                                  _reserve(_rep, newSize);
 887                      
 888                              //// Perform the conversion (overlapping buffers are allowed).
 889 chuck          1.99  
 890 mike           1.112         u_strToLower((UChar*)_rep->data, newSize,
 891                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 892 yi.zhou        1.108 
 893 mike           1.112         _rep->size = newSize;
 894                              return;
 895 david          1.90      }
 896 mike           1.112 
 897                      #endif /* PEGASUS_HAS_ICU */
 898                      
 899 mike           1.114     if (_rep->refs.get() != 1)
 900 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 901                      
 902                          Uint16* p = _rep->data;
 903                          size_t n = _rep->size;
 904                      
 905                          for (; n--; p++)
 906 david          1.90      {
 907 mike           1.112         if (!(*p & 0xFF00))
 908                                  *p = _toLower(*p);
 909 mike           1.27      }
 910 kumpf          1.39  }
 911                      
 912 chuck          1.99  void String::toUpper()
 913 david          1.90  {
 914                      #ifdef PEGASUS_HAS_ICU
 915 mike           1.112 
 916 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 917 chuck          1.99      {
 918 mike           1.114         if (_rep->refs.get() != 1)
 919 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 920                      
 921 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 922 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 923                              // prevents optimizations where the us-ascii is converted before
 924 mike           1.112         // calling ICU.
 925 yi.zhou        1.108         // The string may shrink or expand after the convert.
 926                      
 927 mike           1.112         //// First calculate size of resulting string. u_strToUpper() returns
 928                              //// only the size when zero is passed as the destination size argument.
 929                      
 930 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 931                      
 932 mike           1.112         int32_t newSize = u_strToUpper(
 933                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 934                      
 935                              err = U_ZERO_ERROR;
 936                      
 937                              //// Reserve enough space for the result.
 938                      
 939                              if ((Uint32)newSize > _rep->cap)
 940                                  _reserve(_rep, newSize);
 941                      
 942                              //// Perform the conversion (overlapping buffers are allowed).
 943                      
 944                              u_strToUpper((UChar*)_rep->data, newSize,
 945                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 946 chuck          1.99  
 947 mike           1.112         _rep->size = newSize;
 948 yi.zhou        1.108 
 949 mike           1.112         return;
 950 david          1.91      }
 951 mike           1.112 
 952                      #endif /* PEGASUS_HAS_ICU */
 953                      
 954 mike           1.114     if (_rep->refs.get() != 1)
 955 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 956                      
 957                          Uint16* p = _rep->data;
 958                          size_t n = _rep->size;
 959                      
 960                          for (; n--; p++)
 961                              *p = _toUpper(*p);
 962 david          1.90  }
 963                      
 964 kumpf          1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
 965 kumpf          1.39  {
 966 kumpf          1.118     const Uint16* p1 = s1._rep->data;
 967                          const Uint16* p2 = s2._rep->data;
 968 mike           1.27  
 969 kumpf          1.118     while (n--)
 970                          {
 971                              int r = *p1++ - *p2++;
 972                              if (r)
 973                              {
 974                                  return r;
 975                              }
 976                              else if (!p1[-1])
 977                              {
 978                                  // We must have encountered a null terminator in both s1 and s2
 979                                  return 0;
 980                              }
 981                          }
 982                          return 0;
 983 mike           1.27  }
 984                      
 985 kumpf          1.43  int String::compare(const String& s1, const String& s2)
 986 mike           1.30  {
 987 mike           1.112     return _compare(s1._rep->data, s2._rep->data);
 988                      }
 989 kumpf          1.43  
 990 mike           1.112 int String::compare(const String& s1, const char* s2)
 991                      {
 992                          _checkNullPointer(s2);
 993 mike           1.30  
 994 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
 995                          return _compareNoUTF8(s1._rep->data, s2);
 996                      #else
 997                          // ATTN: optimize this!
 998                          return String::compare(s1, String(s2));
 999                      #endif
1000 mike           1.30  }
1001                      
1002 mike           1.112 int String::compareNoCase(const String& str1, const String& str2)
1003 kumpf          1.40  {
1004 david          1.69  #ifdef PEGASUS_HAS_ICU
1005 mike           1.112 
1006 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1007                          {
1008 mike           1.112         return  u_strcasecmp(
1009 dave.sudlik    1.124             (const UChar*)str1._rep->data,
1010                                  (const UChar*)str2._rep->data,
1011                                  U_FOLD_CASE_DEFAULT
1012                                  );
1013 yi.zhou        1.108     }
1014 kumpf          1.40  
1015 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1016                      
1017                          const Uint16* s1 = str1._rep->data;
1018                          const Uint16* s2 = str2._rep->data;
1019                      
1020                          while (*s1 && *s2)
1021 kumpf          1.40      {
1022 mike           1.112         int r = _toLower(*s1++) - _toLower(*s2++);
1023 kumpf          1.40  
1024 david.dillard  1.105         if (r)
1025                                  return r;
1026 kumpf          1.40      }
1027                      
1028 mike           1.112     if (*s2)
1029 david.dillard  1.105         return -1;
1030 mike           1.112     else if (*s1)
1031 david.dillard  1.105         return 1;
1032 kumpf          1.40  
1033                          return 0;
1034                      }
1035                      
1036 mike           1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1037 mike           1.27  {
1038 mike           1.112 #ifdef PEGASUS_HAS_ICU
1039                      
1040                          return String::compareNoCase(s1, s2) == 0;
1041                      
1042                      #else /* PEGASUS_HAS_ICU */
1043 mike           1.27  
1044 mike           1.112     // The following employs loop unrolling for efficiency. Please do not
1045                          // eliminate.
1046 kumpf          1.39  
1047 mike           1.112     Uint16* p = (Uint16*)s1.getChar16Data();
1048                          Uint16* q = (Uint16*)s2.getChar16Data();
1049                          Uint32 n = s2.size();
1050                      
1051                          while (n >= 8)
1052                          {
1053                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1054                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1055                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1056                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1057                                  ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1058                                  ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1059                                  ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1060                                  ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1061                              {
1062                                  return false;
1063                              }
1064 kumpf          1.39  
1065 mike           1.112         n -= 8;
1066                              p += 8;
1067                              q += 8;
1068                          }
1069 mike           1.27  
1070 mike           1.112     while (n >= 4)
1071 kumpf          1.39      {
1072 mike           1.112         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1073                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1074                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1075                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1076 david.dillard  1.105         {
1077 mike           1.112             return false;
1078 david.dillard  1.105         }
1079 mike           1.112 
1080                              n -= 4;
1081                              p += 4;
1082                              q += 4;
1083                          }
1084                      
1085                          while (n--)
1086                          {
1087                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1088 david.dillard  1.105             return false;
1089 mike           1.112 
1090                              p++;
1091                              q++;
1092 kumpf          1.39      }
1093 mike           1.28  
1094 kumpf          1.39      return true;
1095 mike           1.112 
1096                      #endif /* PEGASUS_HAS_ICU */
1097 david          1.69  }
1098                      
1099 mike           1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1100 david          1.69  {
1101 mike           1.112     _checkNullPointer(s2);
1102 david          1.69  
1103 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1104 david          1.69  
1105 mike           1.112     return String::equalNoCase(s1, String(s2));
1106 david          1.69  
1107 mike           1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1108 david          1.69  
1109 mike           1.112     const Uint16* p1 = (Uint16*)s1._rep->data;
1110                          const char* p2 = s2;
1111                          size_t n = s1._rep->size;
1112 david.dillard  1.105 
1113 mike           1.112     while (n--)
1114                          {
1115                              if (!*p2)
1116                                  return false;
1117 david          1.71  
1118 mike           1.112         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1119                                  return false;
1120                          }
1121 kumpf          1.42  
1122 mike           1.112     if (*p2)
1123                              return false;
1124 david.dillard  1.116 
1125 mike           1.112     return true;
1126 karl           1.36  
1127 mike           1.112 #else /* PEGASUS_HAS_ICU */
1128 david.dillard  1.105 
1129 mike           1.112     // ATTN: optimize this!
1130                          return String::equalNoCase(s1, String(s2));
1131 david.dillard  1.105 
1132 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1133                      }
1134 chuck          1.78  
1135 mike           1.112 Boolean String::equal(const String& s1, const String& s2)
1136 karl           1.36  {
1137 marek          1.131     return (s1._rep == s2._rep) ||
1138 marek          1.137         ((s1._rep->size == s2._rep->size) &&
1139                               memcmp(s1._rep->data,
1140                                      s2._rep->data,
1141                                      s1._rep->size * sizeof(Uint16)) == 0);
1142 karl           1.36  }
1143                      
1144 mike           1.112 Boolean String::equal(const String& s1, const char* s2)
1145                      {
1146                      #ifdef PEGASUS_STRING_NO_UTF8
1147 kumpf          1.35  
1148 mike           1.112     _checkNullPointer(s2);
1149 kumpf          1.39  
1150 mike           1.112     const Uint16* p = (Uint16*)s1._rep->data;
1151                          const char* q = s2;
1152 kumpf          1.39  
1153 mike           1.112     while (*p && *q)
1154                          {
1155                              if (*p++ != Uint16(*q++))
1156                                  return false;
1157                          }
1158 kumpf          1.39  
1159 mike           1.112     return !(*p || *q);
1160 kumpf          1.39  
1161 mike           1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1162 kumpf          1.39  
1163 mike           1.112     return String::equal(s1, String(s2));
1164 kumpf          1.39  
1165 mike           1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1166 kumpf          1.39  }
1167                      
1168 kumpf          1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1169 kumpf          1.39  {
1170 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1171 david          1.69  
1172 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1173                          {
1174 david.dillard  1.105         char *buf = NULL;
1175                              const int size = str.size() * 6;
1176 mike           1.112         UnicodeString UniStr(
1177                                  (const UChar *)str.getChar16Data(), (int32_t)str.size());
1178 david.dillard  1.105         Uint32 bufsize = UniStr.extract(0,size,buf);
1179                              buf = new char[bufsize+1];
1180                              UniStr.extract(0,bufsize,buf);
1181                              os << buf;
1182                              os.flush();
1183                              delete [] buf;
1184 david.dillard  1.116         return os;
1185 yi.zhou        1.108     }
1186 mike           1.112 
1187 david.dillard  1.116 #endif  // PEGASUS_HAS_ICU
1188 mike           1.112 
1189                          for (Uint32 i = 0, n = str.size(); i < n; i++)
1190 yi.zhou        1.108     {
1191 mike           1.112         Uint16 code = str[i];
1192 david.dillard  1.105 
1193 mike           1.112         if (code > 0 && !(code & 0xFF00))
1194                                      os << char(code);
1195                              else
1196                                  {
1197                                  // Print in hex format:
1198                                  char buffer[8];
1199                                  sprintf(buffer, "\\x%04X", code);
1200                                  os << buffer;
1201 david.dillard  1.105         }
1202 yi.zhou        1.108     }
1203 kumpf          1.39  
1204                          return os;
1205                      }
1206                      
1207 mike           1.112 void StringAppendCharAux(StringRep*& _rep)
1208 kumpf          1.39  {
1209 mike           1.112     StringRep* tmp;
1210                      
1211                          if (_rep->cap)
1212                          {
1213                              tmp = StringRep::alloc(2 * _rep->cap);
1214                              tmp->size = _rep->size;
1215                              _copy(tmp->data, _rep->data, _rep->size);
1216                          }
1217                          else
1218                          {
1219                              tmp = StringRep::alloc(8);
1220                              tmp->size = 0;
1221                          }
1222                      
1223                          StringRep::unref(_rep);
1224                          _rep = tmp;
1225 kumpf          1.39  }
1226                      
1227 thilo.boehm    1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1228                      {
1229                          class StringLayout
1230                          {
1231                          public:
1232                              StringRep* rep;
1233                          };
1234                      
1235 kumpf          1.130     StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1236 thilo.boehm    1.128 
1237                          _checkNullPointer(str);
1238                      
1239                          if (n > that->rep->cap || that->rep->refs.get() != 1)
1240                          {
1241                              StringRep::unref(that->rep);
1242                              that->rep = StringRep::alloc(n);
1243                          }
1244                      
1245                          _copy(that->rep->data, str, n);
1246                          that->rep->size = n;
1247                          that->rep->data[that->rep->size] = 0;
1248                      }
1249                      
1250 mike           1.112 PEGASUS_NAMESPACE_END
1251                      
1252                      /*
1253                      ================================================================================
1254                      
1255                      String optimizations:
1256                      
1257                          1.  Added mechanism allowing certain functions to be inlined only when
1258                              used by internal Pegasus modules. External modules (i.e., providers)
1259                              link to a non-inline version, which allows for binary compatibility.
1260                      
1261                          2.  Implemented copy-on-write with atomic increment/decrement. This
1262                              yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1263                              for the 'ni1000' benchmark.
1264                      
1265                          3.  Employed loop unrolling in several places. For example, see:
1266                      
1267                                  static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1268                      
1269                          4.  Used the "empty-rep" optimization (described in whitepaper from the
1270                              GCC Developers Summit). This reduced default construction to a simple
1271 mike           1.112         pointer assignment.
1272                      
1273                                  inline String::String() : _rep(&_emptyRep) { }
1274                      
1275                          5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1276                              For example:
1277                      
1278                                  static const char _upper[] =
1279                                  {
1280                                      0,1,2,...255
1281                                  };
1282                      
1283                                  inline Uint16 _toUpper(Uint16 x)
1284                                  {
1285                                      return (x & 0xFF00) ? x : _upper[x];
1286                                  }
1287                      
1288 david.dillard  1.116         This outperforms the system implementation by avoiding an anding
1289 mike           1.112         operation.
1290                      
1291 david.dillard  1.116     6.  Implemented char* version of the following member functions to
1292                              eliminate unecessary creation of anonymous string objects
1293 mike           1.112         (temporaries).
1294                      
1295                                  String(const String& s1, const char* s2);
1296                                  String(const char* s1, const String& s2);
1297                                  String& String::operator=(const char* str);
1298                                  Uint32 String::find(const char* s) const;
1299                                  bool String::equal(const String& s1, const char* s2);
1300                                  static int String::compare(const String& s1, const char* s2);
1301                                  String& String::append(const char* str);
1302                                  String& String::append(const char* str, Uint32 size);
1303                                  static bool String::equalNoCase(const String& s1, const char* s2);
1304                                  String& operator=(const char* str)
1305                                  String& String::assign(const char* str)
1306                                  String& String::append(const char* str)
1307                                  Boolean operator==(const String& s1, const char* s2)
1308                                  Boolean operator==(const char* s1, const String& s2)
1309                                  Boolean operator!=(const String& s1, const char* s2)
1310                                  Boolean operator!=(const char* s1, const String& s2)
1311                                  Boolean operator<(const String& s1, const char* s2)
1312                                  Boolean operator<(const char* s1, const String& s2)
1313                                  Boolean operator>(const String& s1, const char* s2)
1314 mike           1.112             Boolean operator>(const char* s1, const String& s2)
1315                                  Boolean operator<=(const String& s1, const char* s2)
1316                                  Boolean operator<=(const char* s1, const String& s2)
1317                                  Boolean operator>=(const String& s1, const char* s2)
1318                                  Boolean operator>=(const char* s1, const String& s2)
1319                                  String operator+(const String& s1, const char* s2)
1320                                  String operator+(const char* s1, const String& s2)
1321                      
1322 david.dillard  1.116     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1323 mike           1.112         power of two (algorithm from the book "Hacker's Delight").
1324                      
1325                                  static Uint32 _roundUpToPow2(Uint32 x)
1326                                  {
1327                                      if (x < 8)
1328                                          return 8;
1329                      
1330                                      x--;
1331                                      x |= (x >> 1);
1332                                      x |= (x >> 2);
1333                                      x |= (x >> 4);
1334                                      x |= (x >> 8);
1335                                      x |= (x >> 16);
1336                                      x++;
1337                      
1338                                      return x;
1339                                  }
1340                      
1341                          8.  Implemented "concatenating constructors" to eliminate temporaries
1342 david.dillard  1.116         created by operator+(). This scheme employs the "return-value
1343 mike           1.112         optimization" described by Stan Lippman.
1344                      
1345                                  inline String operator+(const String& s1, const String& s2)
1346                                  {
1347                                      return String(s1, s2, 0);
1348                                  }
1349                      
1350                          9.  Experimented to find the optimial initial size for a short string.
1351                              Eight seems to offer the best tradeoff between space and time.
1352                      
1353                          10. Inlined all members of the Char16 class.
1354                      
1355                          11. Used Uint16 internally in the String class. This showed no improvememnt
1356                              since Char16 was already fully inlined and was essentially reduced to
1357                              Uint16 in any case.
1358                      
1359                          12. Implemented conditional logic (#if) allowing error checking logic to
1360 david.dillard  1.116         be excluded to better performance. Examples include bounds checking
1361 mike           1.112         and null-pointer checking.
1362                      
1363                          13. Used memcpy() and memcmp() where possible. These are implemented using
1364                              the rep family of intructions under Intel and are much faster.
1365                      
1366 david.dillard  1.116     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1367 mike           1.112         copy routine overhead.
1368                      
1369                          15. Added ASCII7 form of the constructor and assign().
1370                      
1371                                  String s("hello world", String::ASCII7);
1372                      
1373                                  s.assignASCII7("hello world");
1374                      
1375                              This avoids slower UTF8 processing when not needed.
1376                      
1377                      ================================================================================
1378                      */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2