(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
   2 martin 1.134 //
   3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
   4              // agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
   5              // this work for additional information regarding copyright ownership.
   6              // Each contributor licenses this file to you under the OpenPegasus Open
   7              // Source License; you may not use this file except in compliance with the
   8              // License.
   9 martin 1.134 //
  10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
  11              // copy of this software and associated documentation files (the "Software"),
  12              // to deal in the Software without restriction, including without limitation
  13              // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  14              // and/or sell copies of the Software, and to permit persons to whom the
  15              // Software is furnished to do so, subject to the following conditions:
  16 martin 1.134 //
  17 martin 1.133 // The above copyright notice and this permission notice shall be included
  18              // in all copies or substantial portions of the Software.
  19 martin 1.134 //
  20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23              // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  24              // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25              // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26              // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 martin 1.134 //
  28 martin 1.133 //////////////////////////////////////////////////////////////////////////
  29 mike   1.27  //
  30              //%/////////////////////////////////////////////////////////////////////////////
  31              
  32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  33 mike           1.113 #include <cstring>
  34 kumpf          1.48  #include "InternalException.h"
  35 mike           1.112 #include "MessageLoader.h"
  36                      #include "StringRep.h"
  37 karl           1.140 #include <Pegasus/Common/Pegasus_inl.h>
  38                      #include <cstdarg>
  39 david          1.69  
  40                      #ifdef PEGASUS_HAS_ICU
  41 kumpf          1.132 # include <unicode/ures.h>
  42                      # include <unicode/ustring.h>
  43                      # include <unicode/uchar.h>
  44 david          1.69  #endif
  45                      
  46 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  47 mike           1.28  
  48 mike           1.112 //==============================================================================
  49                      //
  50                      // Compile-time macros (undefined by default).
  51                      //
  52                      //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  53                      //
  54                      //==============================================================================
  55 mike           1.27  
  56 mike           1.112 //==============================================================================
  57 kumpf          1.39  //
  58 mike           1.112 // File-scope definitions:
  59 kumpf          1.54  //
  60 mike           1.112 //==============================================================================
  61                      
  62                      // Note: this table is much faster than the system toupper(). Please do not
  63                      // change.
  64 kumpf          1.54  
  65 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  66 kumpf          1.54  {
  67 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  68                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  69                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  70                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  71                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  72                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  73                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  74                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  75                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  76                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  77                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  78                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  79                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  80                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  81                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  82                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  83                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  84                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  85                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  86                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  87                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  88 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  89                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  90                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  91                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  92                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  93                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  94                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  95                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
  96                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
  97                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
  98                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
  99                      };
 100                      
 101 dev.meetei     1.139 // Note: this table is much faster than the system tolower(). Please do not
 102 mike           1.112 // change.
 103                      
 104 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 105 mike           1.112 {
 106                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 107                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 108                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 109                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 110                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 111                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 112                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 113                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 114                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 115                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 116                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 117                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 118                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 119                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 120                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 121                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 122                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 123                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 124                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 125                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 126 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 127                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 128                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 129                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 130                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 131                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 132                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 133                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 134                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 135                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 136                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 137                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 138                      };
 139                      
 140                      // Converts 16-bit characters to upper case. This routine is faster than the
 141                      // system toupper(). Please do not change.
 142                      inline Uint16 _toUpper(Uint16 x)
 143                      {
 144                          return (x & 0xFF00) ? x : _toUpperTable[x];
 145 kumpf          1.54  }
 146                      
 147 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 148                      // system toupper(). Please do not change.
 149                      inline Uint16 _toLower(Uint16 x)
 150 kumpf          1.54  {
 151 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 152                      }
 153                      
 154                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 155                      static Uint32 _roundUpToPow2(Uint32 x)
 156                      {
 157 dave.sudlik    1.120     // Check for potential overflow in x
 158                          PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 159 mike           1.112 
 160                          if (x < 8)
 161                              return 8;
 162                      
 163                          x--;
 164                          x |= (x >> 1);
 165                          x |= (x >> 2);
 166                          x |= (x >> 4);
 167                          x |= (x >> 8);
 168                          x |= (x >> 16);
 169                          x++;
 170                      
 171                          return x;
 172                      }
 173                      
 174                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 175                      {
 176                          // The following employs loop unrolling for efficiency. Please do not
 177                          // eliminate.
 178                      
 179                          while (n >= 4)
 180 mike           1.112     {
 181                              if (s[0] == c)
 182                                  return (Uint16*)s;
 183                              if (s[1] == c)
 184                                  return (Uint16*)&s[1];
 185                              if (s[2] == c)
 186                                  return (Uint16*)&s[2];
 187                              if (s[3] == c)
 188                                  return (Uint16*)&s[3];
 189 kumpf          1.82  
 190 mike           1.112         n -= 4;
 191                              s += 4;
 192                          }
 193                      
 194                          if (n)
 195                          {
 196                              if (*s == c)
 197                                  return (Uint16*)s;
 198                              s++;
 199                              n--;
 200                          }
 201                      
 202                          if (n)
 203                          {
 204                              if (*s == c)
 205                                  return (Uint16*)s;
 206                              s++;
 207                              n--;
 208                          }
 209                      
 210                          if (n && *s == c)
 211 mike           1.112         return (Uint16*)s;
 212                      
 213                          // Not found!
 214                          return 0;
 215                      }
 216                      
 217                      static int _compare(const Uint16* s1, const Uint16* s2)
 218                      {
 219                          while (*s1 && *s2)
 220                          {
 221                              int r = *s1++ - *s2++;
 222                      
 223                              if (r)
 224                                  return r;
 225                          }
 226                      
 227                          if (*s2)
 228                              return -1;
 229                          else if (*s1)
 230                              return 1;
 231                      
 232 mike           1.112     return 0;
 233                      }
 234                      
 235 kumpf          1.130 #ifdef PEGASUS_STRING_NO_UTF8
 236 mike           1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2)
 237                      {
 238                          Uint16 c1;
 239                          Uint16 c2;
 240                      
 241                          do
 242                          {
 243                              c1 = *s1++;
 244                              c2 = *s2++;
 245                      
 246                              if (c1 == 0)
 247                                  return c1 - c2;
 248                          }
 249                          while (c1 == c2);
 250                      
 251                          return c1 - c2;
 252                      }
 253 kumpf          1.130 #endif
 254 mike           1.112 
 255                      static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 256                      {
 257                          memcpy(s1, s2, n * sizeof(Uint16));
 258                      }
 259                      
 260                      void StringThrowOutOfBounds()
 261                      {
 262                          throw IndexOutOfBoundsException();
 263                      }
 264                      
 265                      inline void _checkNullPointer(const void* ptr)
 266                      {
 267                          if (!ptr)
 268                              throw NullPointer();
 269                      }
 270                      
 271 thilo.boehm    1.138 #define BADUTF8_MAX_CLEAR_CHAR 40
 272                      #define BADUTF8_MAX_CHAR_TO_HEX 10
 273                      
 274                      static void _formatBadUTF8Chars(
 275                          char* buffer,
 276                          Uint32 index,
 277                          const char* q,
 278                          size_t n )
 279 mike           1.112 {
 280 thilo.boehm    1.138 
 281                          char tmp[20];
 282                          const char* start;
 283                      
 284                          size_t clearChar =
 285                              (( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR );
 286                          size_t charToHex =
 287                              ((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ?
 288                                  (n-index-1) : BADUTF8_MAX_CHAR_TO_HEX );
 289                      
 290                          if (index < BADUTF8_MAX_CLEAR_CHAR)
 291                          {
 292                              start = q;
 293                          } else
 294                          {
 295                              start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]);
 296                          }
 297                      
 298                          // Intialize the buffer with the first character as '\0' to be able to use
 299                          // strnchat() and strcat()
 300                          buffer[0] = 0;
 301 thilo.boehm    1.138     // Start the buffer with the valid UTF8 chars
 302                          strncat(buffer,start,clearChar);
 303                          for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ )
 304                          {
 305                              tmp[0] = 0;
 306                              sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]);
 307                              strncat(buffer,&(tmp[0]),5);
 308                          }
 309                      
 310                      }
 311                      
 312                      static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n)
 313                      {
 314                          char buffer[1024];
 315                      
 316                          _formatBadUTF8Chars(&(buffer[0]),index,q,n);
 317                      
 318 mike           1.112     MessageLoaderParms parms(
 319 thilo.boehm    1.138         "Common.String.BAD_UTF8_LONG",
 320 mike           1.112         "The byte sequence starting at index $0 "
 321 thilo.boehm    1.138         "is not valid UTF-8 encoding: $1",
 322                              index,buffer);
 323                      
 324 mike           1.112     throw Exception(parms);
 325                      }
 326                      
 327 david.dillard  1.116 // Note: dest must be at least three times src (plus an extra byte for
 328 mike           1.112 // terminator).
 329                      static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 330                      {
 331                          // The following employs loop unrolling for efficiency. Please do not
 332                          // eliminate.
 333                      
 334                          const Uint16* q = src;
 335                          Uint8* p = (Uint8*)dest;
 336                      
 337                          while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 338 kumpf          1.82      {
 339 mike           1.112         p[0] = q[0];
 340                              p[1] = q[1];
 341                              p[2] = q[2];
 342                              p[3] = q[3];
 343                              p += 4;
 344                              q += 4;
 345                              n -= 4;
 346 kumpf          1.82      }
 347 mike           1.112 
 348                          switch (n)
 349                          {
 350                              case 0:
 351                                  return p - (Uint8*)dest;
 352                              case 1:
 353                                  if (q[0] < 128)
 354                                  {
 355                                      p[0] = q[0];
 356                                      return p + 1 - (Uint8*)dest;
 357                                  }
 358                                  break;
 359                              case 2:
 360                                  if (q[0] < 128 && q[1] < 128)
 361                                  {
 362                                      p[0] = q[0];
 363                                      p[1] = q[1];
 364                                      return p + 2 - (Uint8*)dest;
 365                                  }
 366                                  break;
 367                              case 3:
 368 mike           1.112             if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 369                                  {
 370                                      p[0] = q[0];
 371                                      p[1] = q[1];
 372                                      p[2] = q[2];
 373                                      return p + 3 - (Uint8*)dest;
 374                                  }
 375                                  break;
 376                          }
 377                      
 378                          // If this line was reached, there must be characters greater than 128.
 379                      
 380                          UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 381                      
 382                          return p - (Uint8*)dest;
 383 kumpf          1.54  }
 384                      
 385 mike           1.112 //==============================================================================
 386                      //
 387                      // class CString
 388                      //
 389                      //==============================================================================
 390                      
 391                      CString::CString(const CString& cstr) : _rep(0)
 392 kumpf          1.54  {
 393 mike           1.112     if (cstr._rep)
 394 kumpf          1.82      {
 395 mike           1.112         size_t n = strlen(cstr._rep) + 1;
 396                              _rep = (char*)operator new(n);
 397                              memcpy(_rep, cstr._rep, n);
 398 kumpf          1.82      }
 399 kumpf          1.54  }
 400                      
 401 kumpf          1.56  CString& CString::operator=(const CString& cstr)
 402                      {
 403 kumpf          1.82      if (&cstr != this)
 404 kumpf          1.81      {
 405 kumpf          1.82          if (_rep)
 406                              {
 407 mike           1.112             operator delete(_rep);
 408 kumpf          1.82              _rep = 0;
 409                              }
 410 mike           1.112 
 411 kumpf          1.82          if (cstr._rep)
 412                              {
 413 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 414                                  _rep = (char*)operator new(n);
 415                                  memcpy(_rep, cstr._rep, n);
 416 kumpf          1.82          }
 417 kumpf          1.81      }
 418 mike           1.112 
 419 kumpf          1.56      return *this;
 420                      }
 421                      
 422 mike           1.112 //==============================================================================
 423 kumpf          1.54  //
 424 mike           1.112 // class StringRep
 425 kumpf          1.39  //
 426 mike           1.112 //==============================================================================
 427 kumpf          1.39  
 428 mike           1.112 StringRep StringRep::_emptyRep;
 429 mike           1.27  
 430 mike           1.112 inline StringRep* StringRep::alloc(size_t cap)
 431 mike           1.27  {
 432 dave.sudlik    1.120     // Check for potential overflow in cap
 433                          PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 434 mike           1.27  
 435 mike           1.112     StringRep* rep = (StringRep*)::operator new(
 436                              sizeof(StringRep) + cap * sizeof(Uint16));
 437                          rep->cap = cap;
 438                          new(&rep->refs) AtomicInt(1);
 439                      
 440                          return rep;
 441 mike           1.27  }
 442                      
 443 mike           1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
 444 chuck          1.102 {
 445 mike           1.114     if (cap > rep->cap || rep->refs.get() != 1)
 446 chuck          1.102     {
 447 mike           1.112         size_t n = _roundUpToPow2(cap);
 448                              StringRep* newRep = StringRep::alloc(n);
 449                              newRep->size = rep->size;
 450                              _copy(newRep->data, rep->data, rep->size + 1);
 451                              StringRep::unref(rep);
 452                              rep = newRep;
 453                          }
 454                      }
 455 david.dillard  1.105 
 456 mike           1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
 457                      {
 458                          StringRep* rep = StringRep::alloc(size);
 459                          rep->size = size;
 460                          _copy(rep->data, data, size);
 461                          rep->data[size] = '\0';
 462                          return rep;
 463                      }
 464 chuck          1.102 
 465 mike           1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
 466                      {
 467                          // Return a new copy of rep. Release rep.
 468 chuck          1.102 
 469 mike           1.112     StringRep* newRep = StringRep::alloc(rep->size);
 470                          newRep->size = rep->size;
 471                          _copy(newRep->data, rep->data, rep->size);
 472                          newRep->data[newRep->size] = '\0';
 473                          StringRep::unref(rep);
 474                          return newRep;
 475 chuck          1.102 }
 476                      
 477 mike           1.112 StringRep* StringRep::create(const char* data, size_t size)
 478 kumpf          1.43  {
 479 mike           1.112     StringRep* rep = StringRep::alloc(size);
 480                          size_t utf8_error_index;
 481                          rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 482                      
 483                          if (rep->size == size_t(-1))
 484                          {
 485                              StringRep::free(rep);
 486 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index, data,size);
 487 mike           1.112     }
 488 kumpf          1.43  
 489 mike           1.112     rep->data[rep->size] = '\0';
 490 kumpf          1.43  
 491 mike           1.112     return rep;
 492 mike           1.27  }
 493                      
 494 mike           1.112 Uint32 StringRep::length(const Uint16* str)
 495 mike           1.27  {
 496 mike           1.112     // Note: We could unroll this but it is rarely called.
 497                      
 498                          const Uint16* end = (Uint16*)str;
 499                      
 500                          while (*end++)
 501                              ;
 502                      
 503 a.dunfey       1.125     return (Uint32)(end - str - 1);
 504 kumpf          1.39  }
 505 tony           1.66  
 506 mike           1.112 //==============================================================================
 507                      //
 508                      // class String
 509                      //
 510                      //==============================================================================
 511                      
 512                      const String String::EMPTY;
 513 mike           1.27  
 514 kumpf          1.39  String::String(const String& str, Uint32 n)
 515                      {
 516 mike           1.112     _checkBounds(n, str._rep->size);
 517                          _rep = StringRep::create(str._rep->data, n);
 518 kumpf          1.39  }
 519                      
 520                      String::String(const Char16* str)
 521                      {
 522 mike           1.112     _checkNullPointer(str);
 523                          _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 524 mike           1.27  }
 525                      
 526 kumpf          1.39  String::String(const Char16* str, Uint32 n)
 527                      {
 528 mike           1.112     _checkNullPointer(str);
 529                          _rep = StringRep::create((Uint16*)str, n);
 530 kumpf          1.39  }
 531                      
 532                      String::String(const char* str)
 533 mike           1.27  {
 534 mike           1.112     _checkNullPointer(str);
 535 david.dillard  1.105 
 536 mike           1.112     // Set this just in case create() throws an exception.
 537                          _rep = &StringRep::_emptyRep;
 538                          _rep = StringRep::create(str, strlen(str));
 539 mike           1.27  }
 540                      
 541 kumpf          1.39  String::String(const char* str, Uint32 n)
 542 mike           1.27  {
 543 mike           1.112     _checkNullPointer(str);
 544 david.dillard  1.105 
 545 mike           1.112     // Set this just in case create() throws an exception.
 546                          _rep = &StringRep::_emptyRep;
 547                          _rep = StringRep::create(str, n);
 548 kumpf          1.39  }
 549 mike           1.27  
 550 mike           1.112 String::String(const String& s1, const String& s2)
 551 kumpf          1.39  {
 552 mike           1.112     size_t n1 = s1._rep->size;
 553                          size_t n2 = s2._rep->size;
 554                          size_t n = n1 + n2;
 555                          _rep = StringRep::alloc(n);
 556                          _copy(_rep->data, s1._rep->data, n1);
 557                          _copy(_rep->data + n1, s2._rep->data, n2);
 558                          _rep->size = n;
 559                          _rep->data[n] = '\0';
 560 mike           1.27  }
 561                      
 562 mike           1.112 String::String(const String& s1, const char* s2)
 563 mike           1.27  {
 564 mike           1.112     _checkNullPointer(s2);
 565                          size_t n1 = s1._rep->size;
 566                          size_t n2 = strlen(s2);
 567                          _rep = StringRep::alloc(n1 + n2);
 568                          _copy(_rep->data, s1._rep->data, n1);
 569                          size_t utf8_error_index;
 570                          size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 571                      
 572                          if (tmp == size_t(-1))
 573 kumpf          1.82      {
 574 mike           1.112         StringRep::free(_rep);
 575                              _rep = &StringRep::_emptyRep;
 576 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);
 577 kumpf          1.82      }
 578 mike           1.112 
 579                          _rep->size = n1 + tmp;
 580                          _rep->data[_rep->size] = '\0';
 581 mike           1.27  }
 582                      
 583 mike           1.112 String::String(const char* s1, const String& s2)
 584 mike           1.27  {
 585 mike           1.112     _checkNullPointer(s1);
 586                          size_t n1 = strlen(s1);
 587                          size_t n2 = s2._rep->size;
 588                          _rep = StringRep::alloc(n1 + n2);
 589                          size_t utf8_error_index;
 590                          size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 591                      
 592                          if (tmp ==  size_t(-1))
 593                          {
 594                              StringRep::free(_rep);
 595                              _rep = &StringRep::_emptyRep;
 596 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);
 597 mike           1.112     }
 598                      
 599                          _rep->size = n2 + tmp;
 600                          _copy(_rep->data + n1, s2._rep->data, n2);
 601                          _rep->data[_rep->size] = '\0';
 602 mike           1.27  }
 603                      
 604 mike           1.112 String& String::assign(const String& str)
 605 mike           1.27  {
 606 mike           1.112     if (_rep != str._rep)
 607 david.dillard  1.105     {
 608 mike           1.112         StringRep::unref(_rep);
 609                              StringRep::ref(_rep = str._rep);
 610 david.dillard  1.105     }
 611                      
 612 mike           1.27      return *this;
 613                      }
 614                      
 615                      String& String::assign(const Char16* str, Uint32 n)
 616                      {
 617 mike           1.112     _checkNullPointer(str);
 618                      
 619 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 620 david.dillard  1.105     {
 621 mike           1.112         StringRep::unref(_rep);
 622                              _rep = StringRep::alloc(n);
 623 david.dillard  1.105     }
 624                      
 625 mike           1.112     _rep->size = n;
 626                          _copy(_rep->data, (Uint16*)str, n);
 627                          _rep->data[n] = '\0';
 628                      
 629 mike           1.27      return *this;
 630                      }
 631                      
 632 mike           1.112 String& String::assign(const char* str, Uint32 n)
 633 chuck          1.102 {
 634 mike           1.112     _checkNullPointer(str);
 635                      
 636 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 637 david.dillard  1.105     {
 638 mike           1.112         StringRep::unref(_rep);
 639                              _rep = StringRep::alloc(n);
 640 david.dillard  1.105     }
 641                      
 642 mike           1.112     size_t utf8_error_index;
 643                          _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 644 chuck          1.102 
 645 mike           1.112     if (_rep->size ==  size_t(-1))
 646 david.dillard  1.105     {
 647 mike           1.112         StringRep::free(_rep);
 648                              _rep = &StringRep::_emptyRep;
 649 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index,str,n);
 650 david.dillard  1.105     }
 651 mike           1.112 
 652                          _rep->data[_rep->size] = 0;
 653 david.dillard  1.105 
 654 mike           1.27      return *this;
 655                      }
 656                      
 657 kumpf          1.39  void String::clear()
 658                      {
 659 mike           1.112     if (_rep->size)
 660                          {
 661 mike           1.114         if (_rep->refs.get() == 1)
 662 mike           1.112         {
 663                                  _rep->size = 0;
 664                                  _rep->data[0] = '\0';
 665                              }
 666                              else
 667                              {
 668                                  StringRep::unref(_rep);
 669                                  _rep = &StringRep::_emptyRep;
 670                              }
 671                          }
 672 kumpf          1.39  }
 673                      
 674 mike           1.112 void String::reserveCapacity(Uint32 cap)
 675 kumpf          1.39  {
 676 mike           1.112     _reserve(_rep, cap);
 677 kumpf          1.39  }
 678                      
 679 mike           1.112 CString String::getCString() const
 680                      {
 681 david.dillard  1.116     // A UTF8 string can have three times as many characters as its UTF16
 682                          // counterpart, so we allocate extra memory for the worst case. In the
 683 mike           1.112     // best case, we may need only one third of the memory allocated. But
 684 david.dillard  1.116     // downsizing the string afterwards is expensive and unecessary since
 685                          // CString objects are usually short-lived (disappearing after only a few
 686 mike           1.112     // instructions). CString objects are typically created on the stack as
 687                          // means to obtain a char* pointer.
 688                      
 689                      #ifdef PEGASUS_STRING_NO_UTF8
 690                          char* str = (char*)operator new(_rep->size + 1);
 691                          _copy(str, _rep->data, _rep->size);
 692                          str[_rep->size] = '\0';
 693                          return CString(str);
 694 gs.keenan      1.110 #else
 695 a.dunfey       1.125     Uint32 n = (Uint32)(3 * _rep->size);
 696 mike           1.112     char* str = (char*)operator new(n + 1);
 697                          size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 698                          str[size] = '\0';
 699                          return CString(str);
 700 gs.keenan      1.110 #endif
 701 kumpf          1.39  }
 702                      
 703 mike           1.112 String& String::append(const Char16* str, Uint32 n)
 704 kumpf          1.39  {
 705 mike           1.112     _checkNullPointer(str);
 706                      
 707                          size_t oldSize = _rep->size;
 708                          size_t newSize = oldSize + n;
 709 a.dunfey       1.125     _reserve(_rep, (Uint32)newSize);
 710 mike           1.112     _copy(_rep->data + oldSize, (Uint16*)str, n);
 711                          _rep->size = newSize;
 712                          _rep->data[newSize] = '\0';
 713                      
 714                          return *this;
 715 kumpf          1.39  }
 716                      
 717 mike           1.112 String& String::append(const String& str)
 718 mike           1.27  {
 719 a.dunfey       1.125     return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
 720 mike           1.27  }
 721                      
 722 mike           1.112 String& String::append(const char* str, Uint32 size)
 723 mike           1.27  {
 724 mike           1.112     _checkNullPointer(str);
 725                      
 726                          size_t oldSize = _rep->size;
 727                          size_t cap = oldSize + size;
 728                      
 729 a.dunfey       1.125     _reserve(_rep, (Uint32)cap);
 730 mike           1.112     size_t utf8_error_index;
 731                          size_t tmp = _convert(
 732                              (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 733                      
 734                          if (tmp ==  size_t(-1))
 735                          {
 736                              StringRep::free(_rep);
 737                              _rep = &StringRep::_emptyRep;
 738 thilo.boehm    1.138         _StringThrowBadUTF8((Uint32)utf8_error_index,str,size);
 739 mike           1.112     }
 740 mike           1.27  
 741 mike           1.112     _rep->size += tmp;
 742                          _rep->data[_rep->size] = '\0';
 743 mike           1.27  
 744 kumpf          1.39      return *this;
 745                      }
 746                      
 747 mike           1.112 void String::remove(Uint32 index, Uint32 n)
 748 mike           1.27  {
 749 mike           1.112     if (n == PEG_NOT_FOUND)
 750 a.dunfey       1.125         n = (Uint32)(_rep->size - index);
 751 mike           1.112 
 752                          _checkBounds(index + n, _rep->size);
 753                      
 754 mike           1.114     if (_rep->refs.get() != 1)
 755 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 756 mike           1.27  
 757 jim.wunderlich 1.115     PEGASUS_ASSERT(index + n <= _rep->size);
 758 mike           1.27  
 759 mike           1.112     size_t rem = _rep->size - (index + n);
 760                          Uint16* data = _rep->data;
 761 mike           1.27  
 762 mike           1.112     if (rem)
 763                              memmove(data + index, data + index + n, rem * sizeof(Uint16));
 764 mike           1.27  
 765 mike           1.112     _rep->size -= n;
 766                          data[_rep->size] = '\0';
 767 mike           1.27  }
 768                      
 769 mike           1.112 String String::subString(Uint32 index, Uint32 n) const
 770 mike           1.27  {
 771 mike           1.112     // Note: this implementation is very permissive but used for
 772                          // backwards compatibility.
 773                      
 774                          if (index < _rep->size)
 775 mike           1.27      {
 776 mike           1.112         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 777 a.dunfey       1.125             n = (Uint32)(_rep->size - index);
 778 mike           1.27  
 779 w.otsuka       1.121         return String((Char16*)(_rep->data + index), n);
 780 mike           1.27      }
 781 david.dillard  1.105 
 782                          return String();
 783 mike           1.27  }
 784                      
 785                      Uint32 String::find(Char16 c) const
 786                      {
 787 mike           1.112     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 788 mike           1.27  
 789 mike           1.112     if (p)
 790 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 791 mike           1.27  
 792                          return PEG_NOT_FOUND;
 793                      }
 794                      
 795 kumpf          1.53  Uint32 String::find(Uint32 index, Char16 c) const
 796 mike           1.30  {
 797 mike           1.112     _checkBounds(index, _rep->size);
 798                      
 799                          if (index >= _rep->size)
 800                              return PEG_NOT_FOUND;
 801                      
 802                          Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 803 mike           1.30  
 804 mike           1.112     if (p)
 805 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 806 mike           1.30  
 807                          return PEG_NOT_FOUND;
 808                      }
 809                      
 810 mike           1.112 Uint32 StringFindAux(
 811                          const StringRep* _rep, const Char16* s, Uint32 n)
 812 mike           1.27  {
 813 mike           1.112     _checkNullPointer(s);
 814 mike           1.27  
 815 mike           1.112     const Uint16* data = _rep->data;
 816                          size_t rem = _rep->size;
 817                      
 818                          while (n <= rem)
 819 mike           1.30      {
 820 mike           1.112         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 821                      
 822                              if (!p)
 823                                  break;
 824 mike           1.30  
 825 mike           1.112         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 826 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 827 david.dillard  1.105 
 828 mike           1.112         p++;
 829                              rem -= p - data;
 830                              data = p;
 831 mike           1.27      }
 832 mike           1.112 
 833 mike           1.27      return PEG_NOT_FOUND;
 834                      }
 835                      
 836 mike           1.112 Uint32 String::find(const char* s) const
 837                      {
 838                          _checkNullPointer(s);
 839                      
 840                          // Note: could optimize away creation of temporary, but this is rarely
 841                          // called.
 842                          return find(String(s));
 843                      }
 844                      
 845 mike           1.27  Uint32 String::reverseFind(Char16 c) const
 846                      {
 847 mike           1.112     Uint16 x = c;
 848                          Uint16* p = _rep->data;
 849                          Uint16* q = _rep->data + _rep->size;
 850 mike           1.27  
 851 mike           1.112     while (q != p)
 852 mike           1.27      {
 853 mike           1.112         if (*--q == x)
 854 david.dillard  1.116             return static_cast<Uint32>(q - p);
 855 mike           1.27      }
 856                      
 857                          return PEG_NOT_FOUND;
 858                      }
 859                      
 860                      void String::toLower()
 861                      {
 862 david          1.69  #ifdef PEGASUS_HAS_ICU
 863 mike           1.112 
 864 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 865 david          1.90      {
 866 mike           1.114         if (_rep->refs.get() != 1)
 867 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 868                      
 869 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 870 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 871                              // prevents optimizations where the us-ascii is converted before
 872 mike           1.112         // calling ICU.
 873 yi.zhou        1.108         // The string may shrink or expand after the convert.
 874                      
 875 mike           1.112         //// First calculate size of resulting string. u_strToLower() returns
 876                              //// only the size when zero is passed as the destination size argument.
 877                      
 878 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 879                      
 880 mike           1.112         int32_t newSize = u_strToLower(
 881                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 882 david.dillard  1.116 
 883 mike           1.112         err = U_ZERO_ERROR;
 884                      
 885                              //// Reserve enough space for the result.
 886                      
 887                              if ((Uint32)newSize > _rep->cap)
 888                                  _reserve(_rep, newSize);
 889                      
 890                              //// Perform the conversion (overlapping buffers are allowed).
 891 chuck          1.99  
 892 mike           1.112         u_strToLower((UChar*)_rep->data, newSize,
 893                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 894 yi.zhou        1.108 
 895 mike           1.112         _rep->size = newSize;
 896                              return;
 897 david          1.90      }
 898 mike           1.112 
 899                      #endif /* PEGASUS_HAS_ICU */
 900                      
 901 mike           1.114     if (_rep->refs.get() != 1)
 902 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 903                      
 904                          Uint16* p = _rep->data;
 905                          size_t n = _rep->size;
 906                      
 907                          for (; n--; p++)
 908 david          1.90      {
 909 mike           1.112         if (!(*p & 0xFF00))
 910                                  *p = _toLower(*p);
 911 mike           1.27      }
 912 kumpf          1.39  }
 913                      
 914 chuck          1.99  void String::toUpper()
 915 david          1.90  {
 916                      #ifdef PEGASUS_HAS_ICU
 917 mike           1.112 
 918 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 919 chuck          1.99      {
 920 mike           1.114         if (_rep->refs.get() != 1)
 921 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 922                      
 923 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 924 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 925                              // prevents optimizations where the us-ascii is converted before
 926 mike           1.112         // calling ICU.
 927 yi.zhou        1.108         // The string may shrink or expand after the convert.
 928                      
 929 mike           1.112         //// First calculate size of resulting string. u_strToUpper() returns
 930                              //// only the size when zero is passed as the destination size argument.
 931                      
 932 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 933                      
 934 mike           1.112         int32_t newSize = u_strToUpper(
 935                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 936                      
 937                              err = U_ZERO_ERROR;
 938                      
 939                              //// Reserve enough space for the result.
 940                      
 941                              if ((Uint32)newSize > _rep->cap)
 942                                  _reserve(_rep, newSize);
 943                      
 944                              //// Perform the conversion (overlapping buffers are allowed).
 945                      
 946                              u_strToUpper((UChar*)_rep->data, newSize,
 947                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 948 chuck          1.99  
 949 mike           1.112         _rep->size = newSize;
 950 yi.zhou        1.108 
 951 mike           1.112         return;
 952 david          1.91      }
 953 mike           1.112 
 954                      #endif /* PEGASUS_HAS_ICU */
 955                      
 956 mike           1.114     if (_rep->refs.get() != 1)
 957 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 958                      
 959                          Uint16* p = _rep->data;
 960                          size_t n = _rep->size;
 961                      
 962                          for (; n--; p++)
 963                              *p = _toUpper(*p);
 964 david          1.90  }
 965                      
 966 kumpf          1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
 967 kumpf          1.39  {
 968 kumpf          1.118     const Uint16* p1 = s1._rep->data;
 969                          const Uint16* p2 = s2._rep->data;
 970 mike           1.27  
 971 kumpf          1.118     while (n--)
 972                          {
 973                              int r = *p1++ - *p2++;
 974                              if (r)
 975                              {
 976                                  return r;
 977                              }
 978                              else if (!p1[-1])
 979                              {
 980                                  // We must have encountered a null terminator in both s1 and s2
 981                                  return 0;
 982                              }
 983                          }
 984                          return 0;
 985 mike           1.27  }
 986                      
 987 kumpf          1.43  int String::compare(const String& s1, const String& s2)
 988 mike           1.30  {
 989 mike           1.112     return _compare(s1._rep->data, s2._rep->data);
 990                      }
 991 kumpf          1.43  
 992 mike           1.112 int String::compare(const String& s1, const char* s2)
 993                      {
 994                          _checkNullPointer(s2);
 995 mike           1.30  
 996 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
 997                          return _compareNoUTF8(s1._rep->data, s2);
 998                      #else
 999                          // ATTN: optimize this!
1000                          return String::compare(s1, String(s2));
1001                      #endif
1002 mike           1.30  }
1003                      
1004 mike           1.112 int String::compareNoCase(const String& str1, const String& str2)
1005 kumpf          1.40  {
1006 david          1.69  #ifdef PEGASUS_HAS_ICU
1007 mike           1.112 
1008 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1009                          {
1010 mike           1.112         return  u_strcasecmp(
1011 dave.sudlik    1.124             (const UChar*)str1._rep->data,
1012                                  (const UChar*)str2._rep->data,
1013                                  U_FOLD_CASE_DEFAULT
1014                                  );
1015 yi.zhou        1.108     }
1016 kumpf          1.40  
1017 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1018                      
1019                          const Uint16* s1 = str1._rep->data;
1020                          const Uint16* s2 = str2._rep->data;
1021                      
1022                          while (*s1 && *s2)
1023 kumpf          1.40      {
1024 mike           1.112         int r = _toLower(*s1++) - _toLower(*s2++);
1025 kumpf          1.40  
1026 david.dillard  1.105         if (r)
1027                                  return r;
1028 kumpf          1.40      }
1029                      
1030 mike           1.112     if (*s2)
1031 david.dillard  1.105         return -1;
1032 mike           1.112     else if (*s1)
1033 david.dillard  1.105         return 1;
1034 kumpf          1.40  
1035                          return 0;
1036                      }
1037                      
1038 mike           1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1039 mike           1.27  {
1040 mike           1.112 #ifdef PEGASUS_HAS_ICU
1041                      
1042                          return String::compareNoCase(s1, s2) == 0;
1043                      
1044                      #else /* PEGASUS_HAS_ICU */
1045 mike           1.27  
1046 mike           1.112     // The following employs loop unrolling for efficiency. Please do not
1047                          // eliminate.
1048 kumpf          1.39  
1049 mike           1.112     Uint16* p = (Uint16*)s1.getChar16Data();
1050                          Uint16* q = (Uint16*)s2.getChar16Data();
1051                          Uint32 n = s2.size();
1052                      
1053                          while (n >= 8)
1054                          {
1055                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1056                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1057                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1058                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1059                                  ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1060                                  ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1061                                  ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1062                                  ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1063                              {
1064                                  return false;
1065                              }
1066 kumpf          1.39  
1067 mike           1.112         n -= 8;
1068                              p += 8;
1069                              q += 8;
1070                          }
1071 mike           1.27  
1072 mike           1.112     while (n >= 4)
1073 kumpf          1.39      {
1074 mike           1.112         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1075                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1076                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1077                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1078 david.dillard  1.105         {
1079 mike           1.112             return false;
1080 david.dillard  1.105         }
1081 mike           1.112 
1082                              n -= 4;
1083                              p += 4;
1084                              q += 4;
1085                          }
1086                      
1087                          while (n--)
1088                          {
1089                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1090 david.dillard  1.105             return false;
1091 mike           1.112 
1092                              p++;
1093                              q++;
1094 kumpf          1.39      }
1095 mike           1.28  
1096 kumpf          1.39      return true;
1097 mike           1.112 
1098                      #endif /* PEGASUS_HAS_ICU */
1099 david          1.69  }
1100                      
1101 mike           1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1102 david          1.69  {
1103 mike           1.112     _checkNullPointer(s2);
1104 david          1.69  
1105 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1106 david          1.69  
1107 mike           1.112     return String::equalNoCase(s1, String(s2));
1108 david          1.69  
1109 mike           1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1110 david          1.69  
1111 mike           1.112     const Uint16* p1 = (Uint16*)s1._rep->data;
1112                          const char* p2 = s2;
1113                          size_t n = s1._rep->size;
1114 david.dillard  1.105 
1115 mike           1.112     while (n--)
1116                          {
1117                              if (!*p2)
1118                                  return false;
1119 david          1.71  
1120 mike           1.112         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1121                                  return false;
1122                          }
1123 kumpf          1.42  
1124 mike           1.112     if (*p2)
1125                              return false;
1126 david.dillard  1.116 
1127 mike           1.112     return true;
1128 karl           1.36  
1129 mike           1.112 #else /* PEGASUS_HAS_ICU */
1130 david.dillard  1.105 
1131 mike           1.112     // ATTN: optimize this!
1132                          return String::equalNoCase(s1, String(s2));
1133 david.dillard  1.105 
1134 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1135                      }
1136 chuck          1.78  
1137 mike           1.112 Boolean String::equal(const String& s1, const String& s2)
1138 karl           1.36  {
1139 marek          1.131     return (s1._rep == s2._rep) ||
1140 marek          1.137         ((s1._rep->size == s2._rep->size) &&
1141                               memcmp(s1._rep->data,
1142                                      s2._rep->data,
1143                                      s1._rep->size * sizeof(Uint16)) == 0);
1144 karl           1.36  }
1145                      
1146 karl           1.140 void String::appendPrintf(const char* format, ...)
1147                      {
1148                          va_list ap;
1149                          va_start(ap, format);
1150                      
1151                          // Format into allocated memory
1152                          ////char* rtnCharPtr = _charVPrintf(format, ap);
1153                      
1154                          // Iniitial allocation size.  This is a guess assuming that
1155                          // most printfs are one or two lines long
1156                          int allocSize = 256;
1157                          int rtnSize;
1158                          char *p;
1159                      
1160                          // initial allocate for output
1161                          if ((p = (char*)malloc(allocSize)) == NULL)
1162                          {
1163                              return;
1164                          }
1165                      
1166                          // repeat formatting  with increased realloc until it works.
1167 karl           1.140     do
1168                          {
1169                              rtnSize = vsnprintf(p, allocSize, format, ap);
1170                      
1171                              // return if successful; i.e. if not negative and
1172                              // returns less than allocated size.
1173                              if (rtnSize > -1 && rtnSize < allocSize)
1174                              {
1175                                  break;
1176                              }
1177                      
1178                              // increment alloc size. Positive return is
1179                              // expected size and negative is error.
1180                              allocSize = (rtnSize > -1)? (rtnSize + 1) : allocSize * 2;
1181                      
1182                          } while((p = (char*)peg_inln_realloc(p, allocSize)) != NULL);
1183                      
1184                          // get here only with error in malloc.
1185                      
1186                          va_end(ap);
1187                      
1188 karl           1.140     // Free allocated memory append printf output to current string
1189                          append(p, rtnSize);
1190                          free(p);
1191                      }
1192                      
1193 mike           1.112 Boolean String::equal(const String& s1, const char* s2)
1194                      {
1195                      #ifdef PEGASUS_STRING_NO_UTF8
1196 kumpf          1.35  
1197 mike           1.112     _checkNullPointer(s2);
1198 kumpf          1.39  
1199 mike           1.112     const Uint16* p = (Uint16*)s1._rep->data;
1200                          const char* q = s2;
1201 kumpf          1.39  
1202 mike           1.112     while (*p && *q)
1203                          {
1204                              if (*p++ != Uint16(*q++))
1205                                  return false;
1206                          }
1207 kumpf          1.39  
1208 mike           1.112     return !(*p || *q);
1209 kumpf          1.39  
1210 mike           1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1211 kumpf          1.39  
1212 mike           1.112     return String::equal(s1, String(s2));
1213 kumpf          1.39  
1214 mike           1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1215 kumpf          1.39  }
1216                      
1217 kumpf          1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1218 kumpf          1.39  {
1219 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1220 david          1.69  
1221 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1222                          {
1223 david.dillard  1.105         char *buf = NULL;
1224                              const int size = str.size() * 6;
1225 mike           1.112         UnicodeString UniStr(
1226                                  (const UChar *)str.getChar16Data(), (int32_t)str.size());
1227 david.dillard  1.105         Uint32 bufsize = UniStr.extract(0,size,buf);
1228                              buf = new char[bufsize+1];
1229                              UniStr.extract(0,bufsize,buf);
1230                              os << buf;
1231                              os.flush();
1232                              delete [] buf;
1233 david.dillard  1.116         return os;
1234 yi.zhou        1.108     }
1235 mike           1.112 
1236 david.dillard  1.116 #endif  // PEGASUS_HAS_ICU
1237 mike           1.112 
1238                          for (Uint32 i = 0, n = str.size(); i < n; i++)
1239 yi.zhou        1.108     {
1240 mike           1.112         Uint16 code = str[i];
1241 david.dillard  1.105 
1242 mike           1.112         if (code > 0 && !(code & 0xFF00))
1243                                      os << char(code);
1244                              else
1245                                  {
1246                                  // Print in hex format:
1247                                  char buffer[8];
1248                                  sprintf(buffer, "\\x%04X", code);
1249                                  os << buffer;
1250 david.dillard  1.105         }
1251 yi.zhou        1.108     }
1252 kumpf          1.39  
1253                          return os;
1254                      }
1255                      
1256 mike           1.112 void StringAppendCharAux(StringRep*& _rep)
1257 kumpf          1.39  {
1258 mike           1.112     StringRep* tmp;
1259                      
1260                          if (_rep->cap)
1261                          {
1262                              tmp = StringRep::alloc(2 * _rep->cap);
1263                              tmp->size = _rep->size;
1264                              _copy(tmp->data, _rep->data, _rep->size);
1265                          }
1266                          else
1267                          {
1268                              tmp = StringRep::alloc(8);
1269                              tmp->size = 0;
1270                          }
1271                      
1272                          StringRep::unref(_rep);
1273                          _rep = tmp;
1274 kumpf          1.39  }
1275                      
1276 thilo.boehm    1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1277                      {
1278                          class StringLayout
1279                          {
1280                          public:
1281                              StringRep* rep;
1282                          };
1283                      
1284 kumpf          1.130     StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1285 thilo.boehm    1.128 
1286                          _checkNullPointer(str);
1287                      
1288                          if (n > that->rep->cap || that->rep->refs.get() != 1)
1289                          {
1290                              StringRep::unref(that->rep);
1291                              that->rep = StringRep::alloc(n);
1292                          }
1293                      
1294                          _copy(that->rep->data, str, n);
1295                          that->rep->size = n;
1296                          that->rep->data[that->rep->size] = 0;
1297                      }
1298                      
1299 mike           1.112 PEGASUS_NAMESPACE_END
1300                      
1301                      /*
1302                      ================================================================================
1303                      
1304                      String optimizations:
1305                      
1306                          1.  Added mechanism allowing certain functions to be inlined only when
1307                              used by internal Pegasus modules. External modules (i.e., providers)
1308                              link to a non-inline version, which allows for binary compatibility.
1309                      
1310                          2.  Implemented copy-on-write with atomic increment/decrement. This
1311                              yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1312                              for the 'ni1000' benchmark.
1313                      
1314                          3.  Employed loop unrolling in several places. For example, see:
1315                      
1316                                  static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1317                      
1318                          4.  Used the "empty-rep" optimization (described in whitepaper from the
1319                              GCC Developers Summit). This reduced default construction to a simple
1320 mike           1.112         pointer assignment.
1321                      
1322                                  inline String::String() : _rep(&_emptyRep) { }
1323                      
1324                          5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1325                              For example:
1326                      
1327                                  static const char _upper[] =
1328                                  {
1329                                      0,1,2,...255
1330                                  };
1331                      
1332                                  inline Uint16 _toUpper(Uint16 x)
1333                                  {
1334                                      return (x & 0xFF00) ? x : _upper[x];
1335                                  }
1336                      
1337 david.dillard  1.116         This outperforms the system implementation by avoiding an anding
1338 mike           1.112         operation.
1339                      
1340 david.dillard  1.116     6.  Implemented char* version of the following member functions to
1341                              eliminate unecessary creation of anonymous string objects
1342 mike           1.112         (temporaries).
1343                      
1344                                  String(const String& s1, const char* s2);
1345                                  String(const char* s1, const String& s2);
1346                                  String& String::operator=(const char* str);
1347                                  Uint32 String::find(const char* s) const;
1348                                  bool String::equal(const String& s1, const char* s2);
1349                                  static int String::compare(const String& s1, const char* s2);
1350                                  String& String::append(const char* str);
1351                                  String& String::append(const char* str, Uint32 size);
1352                                  static bool String::equalNoCase(const String& s1, const char* s2);
1353                                  String& operator=(const char* str)
1354                                  String& String::assign(const char* str)
1355                                  String& String::append(const char* str)
1356                                  Boolean operator==(const String& s1, const char* s2)
1357                                  Boolean operator==(const char* s1, const String& s2)
1358                                  Boolean operator!=(const String& s1, const char* s2)
1359                                  Boolean operator!=(const char* s1, const String& s2)
1360                                  Boolean operator<(const String& s1, const char* s2)
1361                                  Boolean operator<(const char* s1, const String& s2)
1362                                  Boolean operator>(const String& s1, const char* s2)
1363 mike           1.112             Boolean operator>(const char* s1, const String& s2)
1364                                  Boolean operator<=(const String& s1, const char* s2)
1365                                  Boolean operator<=(const char* s1, const String& s2)
1366                                  Boolean operator>=(const String& s1, const char* s2)
1367                                  Boolean operator>=(const char* s1, const String& s2)
1368                                  String operator+(const String& s1, const char* s2)
1369                                  String operator+(const char* s1, const String& s2)
1370                      
1371 david.dillard  1.116     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1372 mike           1.112         power of two (algorithm from the book "Hacker's Delight").
1373                      
1374                                  static Uint32 _roundUpToPow2(Uint32 x)
1375                                  {
1376                                      if (x < 8)
1377                                          return 8;
1378                      
1379                                      x--;
1380                                      x |= (x >> 1);
1381                                      x |= (x >> 2);
1382                                      x |= (x >> 4);
1383                                      x |= (x >> 8);
1384                                      x |= (x >> 16);
1385                                      x++;
1386                      
1387                                      return x;
1388                                  }
1389                      
1390                          8.  Implemented "concatenating constructors" to eliminate temporaries
1391 david.dillard  1.116         created by operator+(). This scheme employs the "return-value
1392 mike           1.112         optimization" described by Stan Lippman.
1393                      
1394                                  inline String operator+(const String& s1, const String& s2)
1395                                  {
1396                                      return String(s1, s2, 0);
1397                                  }
1398                      
1399                          9.  Experimented to find the optimial initial size for a short string.
1400                              Eight seems to offer the best tradeoff between space and time.
1401                      
1402                          10. Inlined all members of the Char16 class.
1403                      
1404                          11. Used Uint16 internally in the String class. This showed no improvememnt
1405                              since Char16 was already fully inlined and was essentially reduced to
1406                              Uint16 in any case.
1407                      
1408                          12. Implemented conditional logic (#if) allowing error checking logic to
1409 david.dillard  1.116         be excluded to better performance. Examples include bounds checking
1410 mike           1.112         and null-pointer checking.
1411                      
1412                          13. Used memcpy() and memcmp() where possible. These are implemented using
1413                              the rep family of intructions under Intel and are much faster.
1414                      
1415 david.dillard  1.116     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1416 mike           1.112         copy routine overhead.
1417                      
1418                          15. Added ASCII7 form of the constructor and assign().
1419                      
1420                                  String s("hello world", String::ASCII7);
1421                      
1422                                  s.assignASCII7("hello world");
1423                      
1424                              This avoids slower UTF8 processing when not needed.
1425                      
1426                      ================================================================================
1427                      */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2