(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
   2 martin 1.134 //
   3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
   4              // agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
   5              // this work for additional information regarding copyright ownership.
   6              // Each contributor licenses this file to you under the OpenPegasus Open
   7              // Source License; you may not use this file except in compliance with the
   8              // License.
   9 martin 1.134 //
  10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
  11              // copy of this software and associated documentation files (the "Software"),
  12              // to deal in the Software without restriction, including without limitation
  13              // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  14              // and/or sell copies of the Software, and to permit persons to whom the
  15              // Software is furnished to do so, subject to the following conditions:
  16 martin 1.134 //
  17 martin 1.133 // The above copyright notice and this permission notice shall be included
  18              // in all copies or substantial portions of the Software.
  19 martin 1.134 //
  20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23              // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  24              // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25              // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26              // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 martin 1.134 //
  28 martin 1.133 //////////////////////////////////////////////////////////////////////////
  29 mike   1.27  //
  30              //%/////////////////////////////////////////////////////////////////////////////
  31              
  32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  33 mike           1.113 #include <cstring>
  34 kumpf          1.48  #include "InternalException.h"
  35 mike           1.112 #include "MessageLoader.h"
  36                      #include "StringRep.h"
  37 karl           1.138.2.2 #include <Pegasus/Common/Pegasus_inl.h>
  38                          #include <cstdarg>
  39 david          1.69      
  40                          #ifdef PEGASUS_HAS_ICU
  41 kumpf          1.132     # include <unicode/ures.h>
  42                          # include <unicode/ustring.h>
  43                          # include <unicode/uchar.h>
  44 david          1.69      #endif
  45                          
  46 mike           1.112     PEGASUS_NAMESPACE_BEGIN
  47 mike           1.28      
  48 mike           1.112     //==============================================================================
  49                          //
  50                          // Compile-time macros (undefined by default).
  51                          //
  52                          //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  53                          //
  54                          //==============================================================================
  55 mike           1.27      
  56 mike           1.112     //==============================================================================
  57 kumpf          1.39      //
  58 mike           1.112     // File-scope definitions:
  59 kumpf          1.54      //
  60 mike           1.112     //==============================================================================
  61                          
  62                          // Note: this table is much faster than the system toupper(). Please do not
  63                          // change.
  64 kumpf          1.54      
  65 david.dillard  1.116     const Uint8 _toUpperTable[256] =
  66 kumpf          1.54      {
  67 mike           1.112         0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  68                              0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  69                              0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  70                              0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  71                              0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  72                              0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  73                              0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  74                              0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  75                              0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  76                              0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  77                              0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  78                              0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  79                              0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  80                              0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  81                              0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  82                              0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  83                              0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  84                              0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  85                              0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  86                              0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  87                              0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  88 mike           1.112         0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  89                              0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  90                              0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  91                              0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  92                              0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  93                              0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  94                              0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  95                              0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
  96                              0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
  97                              0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
  98                              0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
  99                          };
 100                          
 101 karl           1.138.2.1 // Note: this table is much faster than the system tolower(). Please do not
 102 mike           1.112     // change.
 103                          
 104 david.dillard  1.116     const Uint8 _toLowerTable[256] =
 105 mike           1.112     {
 106                              0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 107                              0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 108                              0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 109                              0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 110                              0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 111                              0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 112                              0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 113                              0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 114                              0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 115                              0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 116                              0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 117                              0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 118                              0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 119                              0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 120                              0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 121                              0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 122                              0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 123                              0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 124                              0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 125                              0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 126 mike           1.112         0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 127                              0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 128                              0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 129                              0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 130                              0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 131                              0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 132                              0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 133                              0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 134                              0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 135                              0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 136                              0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 137                              0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 138                          };
 139                          
 140                          // Converts 16-bit characters to upper case. This routine is faster than the
 141                          // system toupper(). Please do not change.
 142                          inline Uint16 _toUpper(Uint16 x)
 143                          {
 144                              return (x & 0xFF00) ? x : _toUpperTable[x];
 145 kumpf          1.54      }
 146                          
 147 mike           1.112     // Converts 16-bit characters to lower case. This routine is faster than the
 148                          // system toupper(). Please do not change.
 149                          inline Uint16 _toLower(Uint16 x)
 150 kumpf          1.54      {
 151 mike           1.112         return (x & 0xFF00) ? x : _toLowerTable[x];
 152                          }
 153                          
 154                          // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 155                          static Uint32 _roundUpToPow2(Uint32 x)
 156                          {
 157 dave.sudlik    1.120         // Check for potential overflow in x
 158                              PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 159 mike           1.112     
 160                              if (x < 8)
 161                                  return 8;
 162                          
 163                              x--;
 164                              x |= (x >> 1);
 165                              x |= (x >> 2);
 166                              x |= (x >> 4);
 167                              x |= (x >> 8);
 168                              x |= (x >> 16);
 169                              x++;
 170                          
 171                              return x;
 172                          }
 173                          
 174                          static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 175                          {
 176                              // The following employs loop unrolling for efficiency. Please do not
 177                              // eliminate.
 178                          
 179                              while (n >= 4)
 180 mike           1.112         {
 181                                  if (s[0] == c)
 182                                      return (Uint16*)s;
 183                                  if (s[1] == c)
 184                                      return (Uint16*)&s[1];
 185                                  if (s[2] == c)
 186                                      return (Uint16*)&s[2];
 187                                  if (s[3] == c)
 188                                      return (Uint16*)&s[3];
 189 kumpf          1.82      
 190 mike           1.112             n -= 4;
 191                                  s += 4;
 192                              }
 193                          
 194                              if (n)
 195                              {
 196                                  if (*s == c)
 197                                      return (Uint16*)s;
 198                                  s++;
 199                                  n--;
 200                              }
 201                          
 202                              if (n)
 203                              {
 204                                  if (*s == c)
 205                                      return (Uint16*)s;
 206                                  s++;
 207                                  n--;
 208                              }
 209                          
 210                              if (n && *s == c)
 211 mike           1.112             return (Uint16*)s;
 212                          
 213                              // Not found!
 214                              return 0;
 215                          }
 216                          
 217                          static int _compare(const Uint16* s1, const Uint16* s2)
 218                          {
 219                              while (*s1 && *s2)
 220                              {
 221                                  int r = *s1++ - *s2++;
 222                          
 223                                  if (r)
 224                                      return r;
 225                              }
 226                          
 227                              if (*s2)
 228                                  return -1;
 229                              else if (*s1)
 230                                  return 1;
 231                          
 232 mike           1.112         return 0;
 233                          }
 234                          
 235 kumpf          1.130     #ifdef PEGASUS_STRING_NO_UTF8
 236 mike           1.112     static int _compareNoUTF8(const Uint16* s1, const char* s2)
 237                          {
 238                              Uint16 c1;
 239                              Uint16 c2;
 240                          
 241                              do
 242                              {
 243                                  c1 = *s1++;
 244                                  c2 = *s2++;
 245                          
 246                                  if (c1 == 0)
 247                                      return c1 - c2;
 248                              }
 249                              while (c1 == c2);
 250                          
 251                              return c1 - c2;
 252                          }
 253 kumpf          1.130     #endif
 254 mike           1.112     
 255                          static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 256                          {
 257                              memcpy(s1, s2, n * sizeof(Uint16));
 258                          }
 259                          
 260                          void StringThrowOutOfBounds()
 261                          {
 262                              throw IndexOutOfBoundsException();
 263                          }
 264                          
 265                          inline void _checkNullPointer(const void* ptr)
 266                          {
 267                              if (!ptr)
 268                                  throw NullPointer();
 269                          }
 270                          
 271 thilo.boehm    1.138     #define BADUTF8_MAX_CLEAR_CHAR 40
 272                          #define BADUTF8_MAX_CHAR_TO_HEX 10
 273                          
 274                          static void _formatBadUTF8Chars(
 275                              char* buffer,
 276                              Uint32 index,
 277                              const char* q,
 278                              size_t n )
 279 mike           1.112     {
 280 thilo.boehm    1.138     
 281                              char tmp[20];
 282                              const char* start;
 283                          
 284                              size_t clearChar =
 285                                  (( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR );
 286                              size_t charToHex =
 287                                  ((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ?
 288                                      (n-index-1) : BADUTF8_MAX_CHAR_TO_HEX );
 289                          
 290                              if (index < BADUTF8_MAX_CLEAR_CHAR)
 291                              {
 292                                  start = q;
 293                              } else
 294                              {
 295                                  start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]);
 296                              }
 297                          
 298                              // Intialize the buffer with the first character as '\0' to be able to use
 299                              // strnchat() and strcat()
 300                              buffer[0] = 0;
 301 thilo.boehm    1.138         // Start the buffer with the valid UTF8 chars
 302                              strncat(buffer,start,clearChar);
 303                              for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ )
 304                              {
 305                                  tmp[0] = 0;
 306                                  sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]);
 307                                  strncat(buffer,&(tmp[0]),5);
 308                              }
 309                          
 310                          }
 311                          
 312                          static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n)
 313                          {
 314                              char buffer[1024];
 315                          
 316                              _formatBadUTF8Chars(&(buffer[0]),index,q,n);
 317                          
 318 mike           1.112         MessageLoaderParms parms(
 319 thilo.boehm    1.138             "Common.String.BAD_UTF8_LONG",
 320 mike           1.112             "The byte sequence starting at index $0 "
 321 thilo.boehm    1.138             "is not valid UTF-8 encoding: $1",
 322                                  index,buffer);
 323                          
 324 mike           1.112         throw Exception(parms);
 325                          }
 326                          
 327 david.dillard  1.116     // Note: dest must be at least three times src (plus an extra byte for
 328 mike           1.112     // terminator).
 329                          static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 330                          {
 331                              // The following employs loop unrolling for efficiency. Please do not
 332                              // eliminate.
 333                          
 334                              const Uint16* q = src;
 335                              Uint8* p = (Uint8*)dest;
 336                          
 337                              while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 338 kumpf          1.82          {
 339 mike           1.112             p[0] = q[0];
 340                                  p[1] = q[1];
 341                                  p[2] = q[2];
 342                                  p[3] = q[3];
 343                                  p += 4;
 344                                  q += 4;
 345                                  n -= 4;
 346 kumpf          1.82          }
 347 mike           1.112     
 348                              switch (n)
 349                              {
 350                                  case 0:
 351                                      return p - (Uint8*)dest;
 352                                  case 1:
 353                                      if (q[0] < 128)
 354                                      {
 355                                          p[0] = q[0];
 356                                          return p + 1 - (Uint8*)dest;
 357                                      }
 358                                      break;
 359                                  case 2:
 360                                      if (q[0] < 128 && q[1] < 128)
 361                                      {
 362                                          p[0] = q[0];
 363                                          p[1] = q[1];
 364                                          return p + 2 - (Uint8*)dest;
 365                                      }
 366                                      break;
 367                                  case 3:
 368 mike           1.112                 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 369                                      {
 370                                          p[0] = q[0];
 371                                          p[1] = q[1];
 372                                          p[2] = q[2];
 373                                          return p + 3 - (Uint8*)dest;
 374                                      }
 375                                      break;
 376                              }
 377                          
 378                              // If this line was reached, there must be characters greater than 128.
 379                          
 380                              UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 381                          
 382                              return p - (Uint8*)dest;
 383 kumpf          1.54      }
 384                          
 385 mike           1.112     //==============================================================================
 386                          //
 387                          // class CString
 388                          //
 389                          //==============================================================================
 390                          
 391                          CString::CString(const CString& cstr) : _rep(0)
 392 kumpf          1.54      {
 393 mike           1.112         if (cstr._rep)
 394 kumpf          1.82          {
 395 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 396                                  _rep = (char*)operator new(n);
 397                                  memcpy(_rep, cstr._rep, n);
 398 kumpf          1.82          }
 399 kumpf          1.54      }
 400                          
 401 kumpf          1.56      CString& CString::operator=(const CString& cstr)
 402                          {
 403 kumpf          1.82          if (&cstr != this)
 404 kumpf          1.81          {
 405 kumpf          1.82              if (_rep)
 406                                  {
 407 mike           1.112                 operator delete(_rep);
 408 kumpf          1.82                  _rep = 0;
 409                                  }
 410 mike           1.112     
 411 kumpf          1.82              if (cstr._rep)
 412                                  {
 413 mike           1.112                 size_t n = strlen(cstr._rep) + 1;
 414                                      _rep = (char*)operator new(n);
 415                                      memcpy(_rep, cstr._rep, n);
 416 kumpf          1.82              }
 417 kumpf          1.81          }
 418 mike           1.112     
 419 kumpf          1.56          return *this;
 420                          }
 421                          
 422 mike           1.112     //==============================================================================
 423 kumpf          1.54      //
 424 mike           1.112     // class StringRep
 425 kumpf          1.39      //
 426 mike           1.112     //==============================================================================
 427 kumpf          1.39      
 428 mike           1.112     StringRep StringRep::_emptyRep;
 429 mike           1.27      
 430 mike           1.112     inline StringRep* StringRep::alloc(size_t cap)
 431 mike           1.27      {
 432 dave.sudlik    1.120         // Check for potential overflow in cap
 433                              PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 434 mike           1.27      
 435 mike           1.112         StringRep* rep = (StringRep*)::operator new(
 436                                  sizeof(StringRep) + cap * sizeof(Uint16));
 437                              rep->cap = cap;
 438                              new(&rep->refs) AtomicInt(1);
 439                          
 440                              return rep;
 441 mike           1.27      }
 442                          
 443 mike           1.112     static inline void _reserve(StringRep*& rep, Uint32 cap)
 444 chuck          1.102     {
 445 mike           1.114         if (cap > rep->cap || rep->refs.get() != 1)
 446 chuck          1.102         {
 447 mike           1.112             size_t n = _roundUpToPow2(cap);
 448                                  StringRep* newRep = StringRep::alloc(n);
 449                                  newRep->size = rep->size;
 450                                  _copy(newRep->data, rep->data, rep->size + 1);
 451                                  StringRep::unref(rep);
 452                                  rep = newRep;
 453                              }
 454                          }
 455 david.dillard  1.105     
 456 mike           1.112     StringRep* StringRep::create(const Uint16* data, size_t size)
 457                          {
 458                              StringRep* rep = StringRep::alloc(size);
 459                              rep->size = size;
 460                              _copy(rep->data, data, size);
 461                              rep->data[size] = '\0';
 462                              return rep;
 463                          }
 464 chuck          1.102     
 465 mike           1.112     StringRep* StringRep::copyOnWrite(StringRep* rep)
 466                          {
 467                              // Return a new copy of rep. Release rep.
 468 chuck          1.102     
 469 mike           1.112         StringRep* newRep = StringRep::alloc(rep->size);
 470                              newRep->size = rep->size;
 471                              _copy(newRep->data, rep->data, rep->size);
 472                              newRep->data[newRep->size] = '\0';
 473                              StringRep::unref(rep);
 474                              return newRep;
 475 chuck          1.102     }
 476                          
 477 mike           1.112     StringRep* StringRep::create(const char* data, size_t size)
 478 kumpf          1.43      {
 479 mike           1.112         StringRep* rep = StringRep::alloc(size);
 480                              size_t utf8_error_index;
 481                              rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 482                          
 483                              if (rep->size == size_t(-1))
 484                              {
 485                                  StringRep::free(rep);
 486 thilo.boehm    1.138             _StringThrowBadUTF8((Uint32)utf8_error_index, data,size);
 487 mike           1.112         }
 488 kumpf          1.43      
 489 mike           1.112         rep->data[rep->size] = '\0';
 490 kumpf          1.43      
 491 mike           1.112         return rep;
 492 mike           1.27      }
 493                          
 494 mike           1.112     Uint32 StringRep::length(const Uint16* str)
 495 mike           1.27      {
 496 mike           1.112         // Note: We could unroll this but it is rarely called.
 497                          
 498                              const Uint16* end = (Uint16*)str;
 499                          
 500                              while (*end++)
 501                                  ;
 502                          
 503 a.dunfey       1.125         return (Uint32)(end - str - 1);
 504 kumpf          1.39      }
 505 tony           1.66      
 506 mike           1.112     //==============================================================================
 507                          //
 508                          // class String
 509                          //
 510                          //==============================================================================
 511                          
 512                          const String String::EMPTY;
 513 mike           1.27      
 514 kumpf          1.39      String::String(const String& str, Uint32 n)
 515                          {
 516 mike           1.112         _checkBounds(n, str._rep->size);
 517                              _rep = StringRep::create(str._rep->data, n);
 518 kumpf          1.39      }
 519                          
 520                          String::String(const Char16* str)
 521                          {
 522 mike           1.112         _checkNullPointer(str);
 523                              _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 524 mike           1.27      }
 525                          
 526 kumpf          1.39      String::String(const Char16* str, Uint32 n)
 527                          {
 528 mike           1.112         _checkNullPointer(str);
 529                              _rep = StringRep::create((Uint16*)str, n);
 530 kumpf          1.39      }
 531                          
 532                          String::String(const char* str)
 533 mike           1.27      {
 534 mike           1.112         _checkNullPointer(str);
 535 david.dillard  1.105     
 536 mike           1.112         // Set this just in case create() throws an exception.
 537                              _rep = &StringRep::_emptyRep;
 538                              _rep = StringRep::create(str, strlen(str));
 539 mike           1.27      }
 540                          
 541 kumpf          1.39      String::String(const char* str, Uint32 n)
 542 mike           1.27      {
 543 mike           1.112         _checkNullPointer(str);
 544 david.dillard  1.105     
 545 mike           1.112         // Set this just in case create() throws an exception.
 546                              _rep = &StringRep::_emptyRep;
 547                              _rep = StringRep::create(str, n);
 548 kumpf          1.39      }
 549 mike           1.27      
 550 mike           1.112     String::String(const String& s1, const String& s2)
 551 kumpf          1.39      {
 552 mike           1.112         size_t n1 = s1._rep->size;
 553                              size_t n2 = s2._rep->size;
 554                              size_t n = n1 + n2;
 555                              _rep = StringRep::alloc(n);
 556                              _copy(_rep->data, s1._rep->data, n1);
 557                              _copy(_rep->data + n1, s2._rep->data, n2);
 558                              _rep->size = n;
 559                              _rep->data[n] = '\0';
 560 mike           1.27      }
 561                          
 562 mike           1.112     String::String(const String& s1, const char* s2)
 563 mike           1.27      {
 564 mike           1.112         _checkNullPointer(s2);
 565                              size_t n1 = s1._rep->size;
 566                              size_t n2 = strlen(s2);
 567                              _rep = StringRep::alloc(n1 + n2);
 568                              _copy(_rep->data, s1._rep->data, n1);
 569                              size_t utf8_error_index;
 570                              size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 571                          
 572                              if (tmp == size_t(-1))
 573 kumpf          1.82          {
 574 mike           1.112             StringRep::free(_rep);
 575                                  _rep = &StringRep::_emptyRep;
 576 thilo.boehm    1.138             _StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);
 577 kumpf          1.82          }
 578 mike           1.112     
 579                              _rep->size = n1 + tmp;
 580                              _rep->data[_rep->size] = '\0';
 581 mike           1.27      }
 582                          
 583 mike           1.112     String::String(const char* s1, const String& s2)
 584 mike           1.27      {
 585 mike           1.112         _checkNullPointer(s1);
 586                              size_t n1 = strlen(s1);
 587                              size_t n2 = s2._rep->size;
 588                              _rep = StringRep::alloc(n1 + n2);
 589                              size_t utf8_error_index;
 590                              size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 591                          
 592                              if (tmp ==  size_t(-1))
 593                              {
 594                                  StringRep::free(_rep);
 595                                  _rep = &StringRep::_emptyRep;
 596 thilo.boehm    1.138             _StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);
 597 mike           1.112         }
 598                          
 599                              _rep->size = n2 + tmp;
 600                              _copy(_rep->data + n1, s2._rep->data, n2);
 601                              _rep->data[_rep->size] = '\0';
 602 mike           1.27      }
 603                          
 604 mike           1.112     String& String::assign(const String& str)
 605 mike           1.27      {
 606 mike           1.112         if (_rep != str._rep)
 607 david.dillard  1.105         {
 608 mike           1.112             StringRep::unref(_rep);
 609                                  StringRep::ref(_rep = str._rep);
 610 david.dillard  1.105         }
 611                          
 612 mike           1.27          return *this;
 613                          }
 614                          
 615                          String& String::assign(const Char16* str, Uint32 n)
 616                          {
 617 mike           1.112         _checkNullPointer(str);
 618                          
 619 mike           1.114         if (n > _rep->cap || _rep->refs.get() != 1)
 620 david.dillard  1.105         {
 621 mike           1.112             StringRep::unref(_rep);
 622                                  _rep = StringRep::alloc(n);
 623 david.dillard  1.105         }
 624                          
 625 mike           1.112         _rep->size = n;
 626                              _copy(_rep->data, (Uint16*)str, n);
 627                              _rep->data[n] = '\0';
 628                          
 629 mike           1.27          return *this;
 630                          }
 631                          
 632 mike           1.112     String& String::assign(const char* str, Uint32 n)
 633 chuck          1.102     {
 634 mike           1.112         _checkNullPointer(str);
 635                          
 636 mike           1.114         if (n > _rep->cap || _rep->refs.get() != 1)
 637 david.dillard  1.105         {
 638 mike           1.112             StringRep::unref(_rep);
 639                                  _rep = StringRep::alloc(n);
 640 david.dillard  1.105         }
 641                          
 642 mike           1.112         size_t utf8_error_index;
 643                              _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 644 chuck          1.102     
 645 mike           1.112         if (_rep->size ==  size_t(-1))
 646 david.dillard  1.105         {
 647 mike           1.112             StringRep::free(_rep);
 648                                  _rep = &StringRep::_emptyRep;
 649 thilo.boehm    1.138             _StringThrowBadUTF8((Uint32)utf8_error_index,str,n);
 650 david.dillard  1.105         }
 651 mike           1.112     
 652                              _rep->data[_rep->size] = 0;
 653 david.dillard  1.105     
 654 mike           1.27          return *this;
 655                          }
 656                          
 657 kumpf          1.39      void String::clear()
 658                          {
 659 mike           1.112         if (_rep->size)
 660                              {
 661 mike           1.114             if (_rep->refs.get() == 1)
 662 mike           1.112             {
 663                                      _rep->size = 0;
 664                                      _rep->data[0] = '\0';
 665                                  }
 666                                  else
 667                                  {
 668                                      StringRep::unref(_rep);
 669                                      _rep = &StringRep::_emptyRep;
 670                                  }
 671                              }
 672 kumpf          1.39      }
 673                          
 674 mike           1.112     void String::reserveCapacity(Uint32 cap)
 675 kumpf          1.39      {
 676 mike           1.112         _reserve(_rep, cap);
 677 kumpf          1.39      }
 678                          
 679 mike           1.112     CString String::getCString() const
 680                          {
 681 david.dillard  1.116         // A UTF8 string can have three times as many characters as its UTF16
 682                              // counterpart, so we allocate extra memory for the worst case. In the
 683 mike           1.112         // best case, we may need only one third of the memory allocated. But
 684 david.dillard  1.116         // downsizing the string afterwards is expensive and unecessary since
 685                              // CString objects are usually short-lived (disappearing after only a few
 686 mike           1.112         // instructions). CString objects are typically created on the stack as
 687                              // means to obtain a char* pointer.
 688                          
 689                          #ifdef PEGASUS_STRING_NO_UTF8
 690                              char* str = (char*)operator new(_rep->size + 1);
 691                              _copy(str, _rep->data, _rep->size);
 692                              str[_rep->size] = '\0';
 693                              return CString(str);
 694 gs.keenan      1.110     #else
 695 a.dunfey       1.125         Uint32 n = (Uint32)(3 * _rep->size);
 696 mike           1.112         char* str = (char*)operator new(n + 1);
 697                              size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 698                              str[size] = '\0';
 699                              return CString(str);
 700 gs.keenan      1.110     #endif
 701 kumpf          1.39      }
 702                          
 703 mike           1.112     String& String::append(const Char16* str, Uint32 n)
 704 kumpf          1.39      {
 705 mike           1.112         _checkNullPointer(str);
 706                          
 707                              size_t oldSize = _rep->size;
 708                              size_t newSize = oldSize + n;
 709 a.dunfey       1.125         _reserve(_rep, (Uint32)newSize);
 710 mike           1.112         _copy(_rep->data + oldSize, (Uint16*)str, n);
 711                              _rep->size = newSize;
 712                              _rep->data[newSize] = '\0';
 713                          
 714                              return *this;
 715 kumpf          1.39      }
 716                          
 717 mike           1.112     String& String::append(const String& str)
 718 mike           1.27      {
 719 a.dunfey       1.125         return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
 720 mike           1.27      }
 721                          
 722 mike           1.112     String& String::append(const char* str, Uint32 size)
 723 mike           1.27      {
 724 mike           1.112         _checkNullPointer(str);
 725                          
 726                              size_t oldSize = _rep->size;
 727                              size_t cap = oldSize + size;
 728                          
 729 a.dunfey       1.125         _reserve(_rep, (Uint32)cap);
 730 mike           1.112         size_t utf8_error_index;
 731                              size_t tmp = _convert(
 732                                  (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 733                          
 734                              if (tmp ==  size_t(-1))
 735                              {
 736                                  StringRep::free(_rep);
 737                                  _rep = &StringRep::_emptyRep;
 738 thilo.boehm    1.138             _StringThrowBadUTF8((Uint32)utf8_error_index,str,size);
 739 mike           1.112         }
 740 mike           1.27      
 741 mike           1.112         _rep->size += tmp;
 742                              _rep->data[_rep->size] = '\0';
 743 mike           1.27      
 744 kumpf          1.39          return *this;
 745                          }
 746                          
 747 mike           1.112     void String::remove(Uint32 index, Uint32 n)
 748 mike           1.27      {
 749 mike           1.112         if (n == PEG_NOT_FOUND)
 750 a.dunfey       1.125             n = (Uint32)(_rep->size - index);
 751 mike           1.112     
 752                              _checkBounds(index + n, _rep->size);
 753                          
 754 mike           1.114         if (_rep->refs.get() != 1)
 755 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 756 mike           1.27      
 757 jim.wunderlich 1.115         PEGASUS_ASSERT(index + n <= _rep->size);
 758 mike           1.27      
 759 mike           1.112         size_t rem = _rep->size - (index + n);
 760                              Uint16* data = _rep->data;
 761 mike           1.27      
 762 mike           1.112         if (rem)
 763                                  memmove(data + index, data + index + n, rem * sizeof(Uint16));
 764 mike           1.27      
 765 mike           1.112         _rep->size -= n;
 766                              data[_rep->size] = '\0';
 767 mike           1.27      }
 768                          
 769 mike           1.112     String String::subString(Uint32 index, Uint32 n) const
 770 mike           1.27      {
 771 mike           1.112         // Note: this implementation is very permissive but used for
 772                              // backwards compatibility.
 773                          
 774                              if (index < _rep->size)
 775 mike           1.27          {
 776 mike           1.112             if (n == PEG_NOT_FOUND || n > _rep->size - index)
 777 a.dunfey       1.125                 n = (Uint32)(_rep->size - index);
 778 mike           1.27      
 779 w.otsuka       1.121             return String((Char16*)(_rep->data + index), n);
 780 mike           1.27          }
 781 david.dillard  1.105     
 782                              return String();
 783 mike           1.27      }
 784                          
 785                          Uint32 String::find(Char16 c) const
 786                          {
 787 mike           1.112         Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 788 mike           1.27      
 789 mike           1.112         if (p)
 790 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 791 mike           1.27      
 792                              return PEG_NOT_FOUND;
 793                          }
 794                          
 795 kumpf          1.53      Uint32 String::find(Uint32 index, Char16 c) const
 796 mike           1.30      {
 797 mike           1.112         _checkBounds(index, _rep->size);
 798                          
 799                              if (index >= _rep->size)
 800                                  return PEG_NOT_FOUND;
 801                          
 802                              Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 803 mike           1.30      
 804 mike           1.112         if (p)
 805 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 806 mike           1.30      
 807                              return PEG_NOT_FOUND;
 808                          }
 809                          
 810 mike           1.112     Uint32 StringFindAux(
 811                              const StringRep* _rep, const Char16* s, Uint32 n)
 812 mike           1.27      {
 813 mike           1.112         _checkNullPointer(s);
 814 mike           1.27      
 815 mike           1.112         const Uint16* data = _rep->data;
 816                              size_t rem = _rep->size;
 817                          
 818                              while (n <= rem)
 819 mike           1.30          {
 820 mike           1.112             Uint16* p = (Uint16*)_find(data, rem, s[0]);
 821                          
 822                                  if (!p)
 823                                      break;
 824 mike           1.30      
 825 mike           1.112             if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 826 david.dillard  1.116                 return static_cast<Uint32>(p - _rep->data);
 827 david.dillard  1.105     
 828 mike           1.112             p++;
 829                                  rem -= p - data;
 830                                  data = p;
 831 mike           1.27          }
 832 mike           1.112     
 833 mike           1.27          return PEG_NOT_FOUND;
 834                          }
 835                          
 836 mike           1.112     Uint32 String::find(const char* s) const
 837                          {
 838                              _checkNullPointer(s);
 839                          
 840                              // Note: could optimize away creation of temporary, but this is rarely
 841                              // called.
 842                              return find(String(s));
 843                          }
 844                          
 845 mike           1.27      Uint32 String::reverseFind(Char16 c) const
 846                          {
 847 mike           1.112         Uint16 x = c;
 848                              Uint16* p = _rep->data;
 849                              Uint16* q = _rep->data + _rep->size;
 850 mike           1.27      
 851 mike           1.112         while (q != p)
 852 mike           1.27          {
 853 mike           1.112             if (*--q == x)
 854 david.dillard  1.116                 return static_cast<Uint32>(q - p);
 855 mike           1.27          }
 856                          
 857                              return PEG_NOT_FOUND;
 858                          }
 859                          
 860                          void String::toLower()
 861                          {
 862 david          1.69      #ifdef PEGASUS_HAS_ICU
 863 mike           1.112     
 864 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
 865 david          1.90          {
 866 mike           1.114             if (_rep->refs.get() != 1)
 867 mike           1.112                 _rep = StringRep::copyOnWrite(_rep);
 868                          
 869 yi.zhou        1.108             // This will do a locale-insensitive, but context-sensitive convert.
 870 david.dillard  1.116             // Since context-sensitive casing looks at adjacent chars, this
 871                                  // prevents optimizations where the us-ascii is converted before
 872 mike           1.112             // calling ICU.
 873 yi.zhou        1.108             // The string may shrink or expand after the convert.
 874                          
 875 mike           1.112             //// First calculate size of resulting string. u_strToLower() returns
 876                                  //// only the size when zero is passed as the destination size argument.
 877                          
 878 yi.zhou        1.108             UErrorCode err = U_ZERO_ERROR;
 879                          
 880 mike           1.112             int32_t newSize = u_strToLower(
 881                                      NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 882 david.dillard  1.116     
 883 mike           1.112             err = U_ZERO_ERROR;
 884                          
 885                                  //// Reserve enough space for the result.
 886                          
 887                                  if ((Uint32)newSize > _rep->cap)
 888                                      _reserve(_rep, newSize);
 889                          
 890                                  //// Perform the conversion (overlapping buffers are allowed).
 891 chuck          1.99      
 892 mike           1.112             u_strToLower((UChar*)_rep->data, newSize,
 893                                      (UChar*)_rep->data, _rep->size, NULL, &err);
 894 yi.zhou        1.108     
 895 mike           1.112             _rep->size = newSize;
 896                                  return;
 897 david          1.90          }
 898 mike           1.112     
 899                          #endif /* PEGASUS_HAS_ICU */
 900                          
 901 mike           1.114         if (_rep->refs.get() != 1)
 902 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 903                          
 904                              Uint16* p = _rep->data;
 905                              size_t n = _rep->size;
 906                          
 907                              for (; n--; p++)
 908 david          1.90          {
 909 mike           1.112             if (!(*p & 0xFF00))
 910                                      *p = _toLower(*p);
 911 mike           1.27          }
 912 kumpf          1.39      }
 913                          
 914 chuck          1.99      void String::toUpper()
 915 david          1.90      {
 916                          #ifdef PEGASUS_HAS_ICU
 917 mike           1.112     
 918 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
 919 chuck          1.99          {
 920 mike           1.114             if (_rep->refs.get() != 1)
 921 mike           1.112                 _rep = StringRep::copyOnWrite(_rep);
 922                          
 923 yi.zhou        1.108             // This will do a locale-insensitive, but context-sensitive convert.
 924 david.dillard  1.116             // Since context-sensitive casing looks at adjacent chars, this
 925                                  // prevents optimizations where the us-ascii is converted before
 926 mike           1.112             // calling ICU.
 927 yi.zhou        1.108             // The string may shrink or expand after the convert.
 928                          
 929 mike           1.112             //// First calculate size of resulting string. u_strToUpper() returns
 930                                  //// only the size when zero is passed as the destination size argument.
 931                          
 932 yi.zhou        1.108             UErrorCode err = U_ZERO_ERROR;
 933                          
 934 mike           1.112             int32_t newSize = u_strToUpper(
 935                                      NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 936                          
 937                                  err = U_ZERO_ERROR;
 938                          
 939                                  //// Reserve enough space for the result.
 940                          
 941                                  if ((Uint32)newSize > _rep->cap)
 942                                      _reserve(_rep, newSize);
 943                          
 944                                  //// Perform the conversion (overlapping buffers are allowed).
 945                          
 946                                  u_strToUpper((UChar*)_rep->data, newSize,
 947                                      (UChar*)_rep->data, _rep->size, NULL, &err);
 948 chuck          1.99      
 949 mike           1.112             _rep->size = newSize;
 950 yi.zhou        1.108     
 951 mike           1.112             return;
 952 david          1.91          }
 953 mike           1.112     
 954                          #endif /* PEGASUS_HAS_ICU */
 955                          
 956 mike           1.114         if (_rep->refs.get() != 1)
 957 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 958                          
 959                              Uint16* p = _rep->data;
 960                              size_t n = _rep->size;
 961                          
 962                              for (; n--; p++)
 963                                  *p = _toUpper(*p);
 964 david          1.90      }
 965                          
 966 kumpf          1.43      int String::compare(const String& s1, const String& s2, Uint32 n)
 967 kumpf          1.39      {
 968 kumpf          1.118         const Uint16* p1 = s1._rep->data;
 969                              const Uint16* p2 = s2._rep->data;
 970 mike           1.27      
 971 kumpf          1.118         while (n--)
 972                              {
 973                                  int r = *p1++ - *p2++;
 974                                  if (r)
 975                                  {
 976                                      return r;
 977                                  }
 978                                  else if (!p1[-1])
 979                                  {
 980                                      // We must have encountered a null terminator in both s1 and s2
 981                                      return 0;
 982                                  }
 983                              }
 984                              return 0;
 985 mike           1.27      }
 986                          
 987 kumpf          1.43      int String::compare(const String& s1, const String& s2)
 988 mike           1.30      {
 989 mike           1.112         return _compare(s1._rep->data, s2._rep->data);
 990                          }
 991 kumpf          1.43      
 992 mike           1.112     int String::compare(const String& s1, const char* s2)
 993                          {
 994                              _checkNullPointer(s2);
 995 mike           1.30      
 996 mike           1.112     #ifdef PEGASUS_STRING_NO_UTF8
 997                              return _compareNoUTF8(s1._rep->data, s2);
 998                          #else
 999                              // ATTN: optimize this!
1000                              return String::compare(s1, String(s2));
1001                          #endif
1002 mike           1.30      }
1003                          
1004 mike           1.112     int String::compareNoCase(const String& str1, const String& str2)
1005 kumpf          1.40      {
1006 david          1.69      #ifdef PEGASUS_HAS_ICU
1007 mike           1.112     
1008 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
1009                              {
1010 mike           1.112             return  u_strcasecmp(
1011 dave.sudlik    1.124                 (const UChar*)str1._rep->data,
1012                                      (const UChar*)str2._rep->data,
1013                                      U_FOLD_CASE_DEFAULT
1014                                      );
1015 yi.zhou        1.108         }
1016 kumpf          1.40      
1017 mike           1.112     #endif /* PEGASUS_HAS_ICU */
1018                          
1019                              const Uint16* s1 = str1._rep->data;
1020                              const Uint16* s2 = str2._rep->data;
1021                          
1022                              while (*s1 && *s2)
1023 kumpf          1.40          {
1024 mike           1.112             int r = _toLower(*s1++) - _toLower(*s2++);
1025 kumpf          1.40      
1026 david.dillard  1.105             if (r)
1027                                      return r;
1028 kumpf          1.40          }
1029                          
1030 mike           1.112         if (*s2)
1031 david.dillard  1.105             return -1;
1032 mike           1.112         else if (*s1)
1033 david.dillard  1.105             return 1;
1034 kumpf          1.40      
1035                              return 0;
1036                          }
1037                          
1038 mike           1.112     Boolean StringEqualNoCase(const String& s1, const String& s2)
1039 mike           1.27      {
1040 mike           1.112     #ifdef PEGASUS_HAS_ICU
1041                          
1042                              return String::compareNoCase(s1, s2) == 0;
1043                          
1044                          #else /* PEGASUS_HAS_ICU */
1045 mike           1.27      
1046 mike           1.112         // The following employs loop unrolling for efficiency. Please do not
1047                              // eliminate.
1048 kumpf          1.39      
1049 mike           1.112         Uint16* p = (Uint16*)s1.getChar16Data();
1050                              Uint16* q = (Uint16*)s2.getChar16Data();
1051                              Uint32 n = s2.size();
1052                          
1053                              while (n >= 8)
1054                              {
1055                                  if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1056                                      ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1057                                      ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1058                                      ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1059                                      ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1060                                      ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1061                                      ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1062                                      ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1063                                  {
1064                                      return false;
1065                                  }
1066 kumpf          1.39      
1067 mike           1.112             n -= 8;
1068                                  p += 8;
1069                                  q += 8;
1070                              }
1071 mike           1.27      
1072 mike           1.112         while (n >= 4)
1073 kumpf          1.39          {
1074 mike           1.112             if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1075                                      ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1076                                      ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1077                                      ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1078 david.dillard  1.105             {
1079 mike           1.112                 return false;
1080 david.dillard  1.105             }
1081 mike           1.112     
1082                                  n -= 4;
1083                                  p += 4;
1084                                  q += 4;
1085                              }
1086                          
1087                              while (n--)
1088                              {
1089                                  if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1090 david.dillard  1.105                 return false;
1091 mike           1.112     
1092                                  p++;
1093                                  q++;
1094 kumpf          1.39          }
1095 mike           1.28      
1096 kumpf          1.39          return true;
1097 mike           1.112     
1098                          #endif /* PEGASUS_HAS_ICU */
1099 david          1.69      }
1100                          
1101 mike           1.112     Boolean String::equalNoCase(const String& s1, const char* s2)
1102 david          1.69      {
1103 mike           1.112         _checkNullPointer(s2);
1104 david          1.69      
1105 mike           1.112     #if defined(PEGASUS_HAS_ICU)
1106 david          1.69      
1107 mike           1.112         return String::equalNoCase(s1, String(s2));
1108 david          1.69      
1109 mike           1.112     #elif defined(PEGASUS_STRING_NO_UTF8)
1110 david          1.69      
1111 mike           1.112         const Uint16* p1 = (Uint16*)s1._rep->data;
1112                              const char* p2 = s2;
1113                              size_t n = s1._rep->size;
1114 david.dillard  1.105     
1115 mike           1.112         while (n--)
1116                              {
1117                                  if (!*p2)
1118                                      return false;
1119 david          1.71      
1120 mike           1.112             if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1121                                      return false;
1122                              }
1123 kumpf          1.42      
1124 mike           1.112         if (*p2)
1125                                  return false;
1126 david.dillard  1.116     
1127 mike           1.112         return true;
1128 karl           1.36      
1129 mike           1.112     #else /* PEGASUS_HAS_ICU */
1130 david.dillard  1.105     
1131 mike           1.112         // ATTN: optimize this!
1132                              return String::equalNoCase(s1, String(s2));
1133 david.dillard  1.105     
1134 mike           1.112     #endif /* PEGASUS_HAS_ICU */
1135                          }
1136 chuck          1.78      
1137 mike           1.112     Boolean String::equal(const String& s1, const String& s2)
1138 karl           1.36      {
1139 marek          1.131         return (s1._rep == s2._rep) ||
1140 marek          1.137             ((s1._rep->size == s2._rep->size) &&
1141                                   memcmp(s1._rep->data,
1142                                          s2._rep->data,
1143                                          s1._rep->size * sizeof(Uint16)) == 0);
1144 karl           1.36      }
1145                          
1146 karl           1.138.2.2 void String::appendPrintf(const char* format, ...)
1147                          {
1148                              va_list ap;
1149                              va_start(ap, format);
1150                          
1151                              // Format into allocated memory
1152                              ////char* rtnCharPtr = _charVPrintf(format, ap);
1153                          
1154                              // Iniitial allocation size.  This is a guess assuming that
1155                              // most printfs are one or two lines long
1156                              int allocSize = 256;
1157                              int rtnSize;
1158                              char *p;
1159                          
1160                              // initial allocate for output
1161                              if ((p = (char*)malloc(allocSize)) == NULL)
1162                              {
1163                                  return;
1164                              }
1165                          
1166                              // repeat formatting  with increased realloc until it works.
1167 karl           1.138.2.2     do
1168                              {
1169                                  rtnSize = vsnprintf(p, allocSize, format, ap);
1170                          
1171                                  // return if successful; i.e. if not negative and
1172                                  // returns less than allocated size.
1173                                  if (rtnSize > -1 && rtnSize < allocSize)
1174                                  {
1175                                      break;
1176                                  }
1177                          
1178                                  // increment alloc size. Positive return is
1179                                  // expected size and negative is error.
1180                                  allocSize = (rtnSize > -1)? (rtnSize + 1) : allocSize * 2;
1181                          
1182                              } while((p = (char*)peg_inln_realloc(p, allocSize)) != NULL);
1183                          
1184                              // get here only with error in malloc.
1185                          
1186                              va_end(ap);
1187                          
1188 karl           1.138.2.2     // Free allocated memory append printf output to current string
1189                              append(p, rtnSize);
1190                              free(p);
1191                          }
1192                          
1193 mike           1.112     Boolean String::equal(const String& s1, const char* s2)
1194                          {
1195                          #ifdef PEGASUS_STRING_NO_UTF8
1196 kumpf          1.35      
1197 mike           1.112         _checkNullPointer(s2);
1198 kumpf          1.39      
1199 mike           1.112         const Uint16* p = (Uint16*)s1._rep->data;
1200                              const char* q = s2;
1201 kumpf          1.39      
1202 mike           1.112         while (*p && *q)
1203                              {
1204                                  if (*p++ != Uint16(*q++))
1205                                      return false;
1206                              }
1207 kumpf          1.39      
1208 mike           1.112         return !(*p || *q);
1209 kumpf          1.39      
1210 mike           1.112     #else /* PEGASUS_STRING_NO_UTF8 */
1211 kumpf          1.39      
1212 mike           1.112         return String::equal(s1, String(s2));
1213 kumpf          1.39      
1214 mike           1.112     #endif /* PEGASUS_STRING_NO_UTF8 */
1215 kumpf          1.39      }
1216                          
1217 kumpf          1.47      PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1218 kumpf          1.39      {
1219 mike           1.112     #if defined(PEGASUS_HAS_ICU)
1220 david          1.69      
1221 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
1222                              {
1223 david.dillard  1.105             char *buf = NULL;
1224                                  const int size = str.size() * 6;
1225 mike           1.112             UnicodeString UniStr(
1226                                      (const UChar *)str.getChar16Data(), (int32_t)str.size());
1227 david.dillard  1.105             Uint32 bufsize = UniStr.extract(0,size,buf);
1228                                  buf = new char[bufsize+1];
1229                                  UniStr.extract(0,bufsize,buf);
1230                                  os << buf;
1231                                  os.flush();
1232                                  delete [] buf;
1233 david.dillard  1.116             return os;
1234 yi.zhou        1.108         }
1235 mike           1.112     
1236 david.dillard  1.116     #endif  // PEGASUS_HAS_ICU
1237 mike           1.112     
1238                              for (Uint32 i = 0, n = str.size(); i < n; i++)
1239 yi.zhou        1.108         {
1240 mike           1.112             Uint16 code = str[i];
1241 david.dillard  1.105     
1242 mike           1.112             if (code > 0 && !(code & 0xFF00))
1243                                          os << char(code);
1244                                  else
1245                                      {
1246                                      // Print in hex format:
1247                                      char buffer[8];
1248                                      sprintf(buffer, "\\x%04X", code);
1249                                      os << buffer;
1250 david.dillard  1.105             }
1251 yi.zhou        1.108         }
1252 kumpf          1.39      
1253                              return os;
1254                          }
1255                          
1256 mike           1.112     void StringAppendCharAux(StringRep*& _rep)
1257 kumpf          1.39      {
1258 mike           1.112         StringRep* tmp;
1259                          
1260                              if (_rep->cap)
1261                              {
1262                                  tmp = StringRep::alloc(2 * _rep->cap);
1263                                  tmp->size = _rep->size;
1264                                  _copy(tmp->data, _rep->data, _rep->size);
1265                              }
1266                              else
1267                              {
1268                                  tmp = StringRep::alloc(8);
1269                                  tmp->size = 0;
1270                              }
1271                          
1272                              StringRep::unref(_rep);
1273                              _rep = tmp;
1274 kumpf          1.39      }
1275                          
1276 thilo.boehm    1.128     void AssignASCII(String& s, const char* str, Uint32 n)
1277                          {
1278                              class StringLayout
1279                              {
1280                              public:
1281                                  StringRep* rep;
1282                              };
1283                          
1284 kumpf          1.130         StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1285 thilo.boehm    1.128     
1286                              _checkNullPointer(str);
1287                          
1288                              if (n > that->rep->cap || that->rep->refs.get() != 1)
1289                              {
1290                                  StringRep::unref(that->rep);
1291                                  that->rep = StringRep::alloc(n);
1292                              }
1293                          
1294                              _copy(that->rep->data, str, n);
1295                              that->rep->size = n;
1296                              that->rep->data[that->rep->size] = 0;
1297                          }
1298                          
1299 mike           1.112     PEGASUS_NAMESPACE_END
1300                          
1301                          /*
1302                          ================================================================================
1303                          
1304                          String optimizations:
1305                          
1306                              1.  Added mechanism allowing certain functions to be inlined only when
1307                                  used by internal Pegasus modules. External modules (i.e., providers)
1308                                  link to a non-inline version, which allows for binary compatibility.
1309                          
1310                              2.  Implemented copy-on-write with atomic increment/decrement. This
1311                                  yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1312                                  for the 'ni1000' benchmark.
1313                          
1314                              3.  Employed loop unrolling in several places. For example, see:
1315                          
1316                                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1317                          
1318                              4.  Used the "empty-rep" optimization (described in whitepaper from the
1319                                  GCC Developers Summit). This reduced default construction to a simple
1320 mike           1.112             pointer assignment.
1321                          
1322                                      inline String::String() : _rep(&_emptyRep) { }
1323                          
1324                              5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1325                                  For example:
1326                          
1327                                      static const char _upper[] =
1328                                      {
1329                                          0,1,2,...255
1330                                      };
1331                          
1332                                      inline Uint16 _toUpper(Uint16 x)
1333                                      {
1334                                          return (x & 0xFF00) ? x : _upper[x];
1335                                      }
1336                          
1337 david.dillard  1.116             This outperforms the system implementation by avoiding an anding
1338 mike           1.112             operation.
1339                          
1340 david.dillard  1.116         6.  Implemented char* version of the following member functions to
1341                                  eliminate unecessary creation of anonymous string objects
1342 mike           1.112             (temporaries).
1343                          
1344                                      String(const String& s1, const char* s2);
1345                                      String(const char* s1, const String& s2);
1346                                      String& String::operator=(const char* str);
1347                                      Uint32 String::find(const char* s) const;
1348                                      bool String::equal(const String& s1, const char* s2);
1349                                      static int String::compare(const String& s1, const char* s2);
1350                                      String& String::append(const char* str);
1351                                      String& String::append(const char* str, Uint32 size);
1352                                      static bool String::equalNoCase(const String& s1, const char* s2);
1353                                      String& operator=(const char* str)
1354                                      String& String::assign(const char* str)
1355                                      String& String::append(const char* str)
1356                                      Boolean operator==(const String& s1, const char* s2)
1357                                      Boolean operator==(const char* s1, const String& s2)
1358                                      Boolean operator!=(const String& s1, const char* s2)
1359                                      Boolean operator!=(const char* s1, const String& s2)
1360                                      Boolean operator<(const String& s1, const char* s2)
1361                                      Boolean operator<(const char* s1, const String& s2)
1362                                      Boolean operator>(const String& s1, const char* s2)
1363 mike           1.112                 Boolean operator>(const char* s1, const String& s2)
1364                                      Boolean operator<=(const String& s1, const char* s2)
1365                                      Boolean operator<=(const char* s1, const String& s2)
1366                                      Boolean operator>=(const String& s1, const char* s2)
1367                                      Boolean operator>=(const char* s1, const String& s2)
1368                                      String operator+(const String& s1, const char* s2)
1369                                      String operator+(const char* s1, const String& s2)
1370                          
1371 david.dillard  1.116         7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1372 mike           1.112             power of two (algorithm from the book "Hacker's Delight").
1373                          
1374                                      static Uint32 _roundUpToPow2(Uint32 x)
1375                                      {
1376                                          if (x < 8)
1377                                              return 8;
1378                          
1379                                          x--;
1380                                          x |= (x >> 1);
1381                                          x |= (x >> 2);
1382                                          x |= (x >> 4);
1383                                          x |= (x >> 8);
1384                                          x |= (x >> 16);
1385                                          x++;
1386                          
1387                                          return x;
1388                                      }
1389                          
1390                              8.  Implemented "concatenating constructors" to eliminate temporaries
1391 david.dillard  1.116             created by operator+(). This scheme employs the "return-value
1392 mike           1.112             optimization" described by Stan Lippman.
1393                          
1394                                      inline String operator+(const String& s1, const String& s2)
1395                                      {
1396                                          return String(s1, s2, 0);
1397                                      }
1398                          
1399                              9.  Experimented to find the optimial initial size for a short string.
1400                                  Eight seems to offer the best tradeoff between space and time.
1401                          
1402                              10. Inlined all members of the Char16 class.
1403                          
1404                              11. Used Uint16 internally in the String class. This showed no improvememnt
1405                                  since Char16 was already fully inlined and was essentially reduced to
1406                                  Uint16 in any case.
1407                          
1408                              12. Implemented conditional logic (#if) allowing error checking logic to
1409 david.dillard  1.116             be excluded to better performance. Examples include bounds checking
1410 mike           1.112             and null-pointer checking.
1411                          
1412                              13. Used memcpy() and memcmp() where possible. These are implemented using
1413                                  the rep family of intructions under Intel and are much faster.
1414                          
1415 david.dillard  1.116         14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1416 mike           1.112             copy routine overhead.
1417                          
1418                              15. Added ASCII7 form of the constructor and assign().
1419                          
1420                                      String s("hello world", String::ASCII7);
1421                          
1422                                      s.assignASCII7("hello world");
1423                          
1424                                  This avoids slower UTF8 processing when not needed.
1425                          
1426                          ================================================================================
1427                          */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2