(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
   2 martin 1.134 //
   3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
   4              // agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
   5              // this work for additional information regarding copyright ownership.
   6              // Each contributor licenses this file to you under the OpenPegasus Open
   7              // Source License; you may not use this file except in compliance with the
   8              // License.
   9 martin 1.134 //
  10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
  11              // copy of this software and associated documentation files (the "Software"),
  12              // to deal in the Software without restriction, including without limitation
  13              // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  14              // and/or sell copies of the Software, and to permit persons to whom the
  15              // Software is furnished to do so, subject to the following conditions:
  16 martin 1.134 //
  17 martin 1.133 // The above copyright notice and this permission notice shall be included
  18              // in all copies or substantial portions of the Software.
  19 martin 1.134 //
  20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23              // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  24              // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25              // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26              // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 martin 1.134 //
  28 martin 1.133 //////////////////////////////////////////////////////////////////////////
  29 mike   1.27  //
  30              //%/////////////////////////////////////////////////////////////////////////////
  31              
  32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  33 mike           1.113 #include <cstring>
  34 kumpf          1.48  #include "InternalException.h"
  35 david          1.69  #include "CommonUTF.h"
  36 mike           1.112 #include "MessageLoader.h"
  37                      #include "StringRep.h"
  38 david          1.69  
  39                      #ifdef PEGASUS_HAS_ICU
  40 kumpf          1.132 # include <unicode/ures.h>
  41                      # include <unicode/ustring.h>
  42                      # include <unicode/uchar.h>
  43 david          1.69  #endif
  44                      
  45 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  46 mike           1.28  
  47 mike           1.112 //==============================================================================
  48                      //
  49                      // Compile-time macros (undefined by default).
  50                      //
  51                      //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  52                      //
  53                      //==============================================================================
  54 mike           1.27  
  55 mike           1.112 //==============================================================================
  56 kumpf          1.39  //
  57 mike           1.112 // File-scope definitions:
  58 kumpf          1.54  //
  59 mike           1.112 //==============================================================================
  60                      
  61                      // Note: this table is much faster than the system toupper(). Please do not
  62                      // change.
  63 kumpf          1.54  
  64 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  65 kumpf          1.54  {
  66 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  67                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  68                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  69                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  70                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  71                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  72                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  73                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  74                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  75                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  76                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  77                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  78                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  79                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  80                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  81                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  82                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  83                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  84                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  85                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  86                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  87 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  88                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  89                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  90                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  91                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  92                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  93                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  94                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
  95                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
  96                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
  97                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
  98                      };
  99                      
 100                      // Note: this table is much faster than the system tulower(). Please do not
 101                      // change.
 102                      
 103 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 104 mike           1.112 {
 105                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 106                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 107                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 108                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 109                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 110                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 111                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 112                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 113                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 114                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 115                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 116                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 117                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 118                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 119                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 120                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 121                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 122                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 123                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 124                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 125 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 126                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 127                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 128                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 129                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 130                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 131                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 132                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 133                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 134                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 135                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 136                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 137                      };
 138                      
 139                      // Converts 16-bit characters to upper case. This routine is faster than the
 140                      // system toupper(). Please do not change.
 141                      inline Uint16 _toUpper(Uint16 x)
 142                      {
 143                          return (x & 0xFF00) ? x : _toUpperTable[x];
 144 kumpf          1.54  }
 145                      
 146 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 147                      // system toupper(). Please do not change.
 148                      inline Uint16 _toLower(Uint16 x)
 149 kumpf          1.54  {
 150 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 151                      }
 152                      
 153                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 154                      static Uint32 _roundUpToPow2(Uint32 x)
 155                      {
 156 dave.sudlik    1.120     // Check for potential overflow in x
 157                          PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 158 mike           1.112 
 159                          if (x < 8)
 160                              return 8;
 161                      
 162                          x--;
 163                          x |= (x >> 1);
 164                          x |= (x >> 2);
 165                          x |= (x >> 4);
 166                          x |= (x >> 8);
 167                          x |= (x >> 16);
 168                          x++;
 169                      
 170                          return x;
 171                      }
 172                      
 173                      template<class P, class Q>
 174                      static void _copy(P* p, const Q* q, size_t n)
 175                      {
 176                          // The following employs loop unrolling for efficiency. Please do not
 177                          // eliminate.
 178                      
 179 mike           1.112     while (n >= 8)
 180                          {
 181                              p[0] = q[0];
 182                              p[1] = q[1];
 183                              p[2] = q[2];
 184                              p[3] = q[3];
 185                              p[4] = q[4];
 186                              p[5] = q[5];
 187                              p[6] = q[6];
 188                              p[7] = q[7];
 189                              p += 8;
 190                              q += 8;
 191                              n -= 8;
 192                          }
 193                      
 194                          while (n >= 4)
 195                          {
 196                              p[0] = q[0];
 197                              p[1] = q[1];
 198                              p[2] = q[2];
 199                              p[3] = q[3];
 200 mike           1.112         p += 4;
 201                              q += 4;
 202                              n -= 4;
 203                          }
 204                      
 205                          while (n--)
 206                              *p++ = *q++;
 207                      }
 208                      
 209                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 210                      {
 211                          // The following employs loop unrolling for efficiency. Please do not
 212                          // eliminate.
 213                      
 214                          while (n >= 4)
 215                          {
 216                              if (s[0] == c)
 217                                  return (Uint16*)s;
 218                              if (s[1] == c)
 219                                  return (Uint16*)&s[1];
 220                              if (s[2] == c)
 221 mike           1.112             return (Uint16*)&s[2];
 222                              if (s[3] == c)
 223                                  return (Uint16*)&s[3];
 224 kumpf          1.82  
 225 mike           1.112         n -= 4;
 226                              s += 4;
 227                          }
 228                      
 229                          if (n)
 230                          {
 231                              if (*s == c)
 232                                  return (Uint16*)s;
 233                              s++;
 234                              n--;
 235                          }
 236                      
 237                          if (n)
 238                          {
 239                              if (*s == c)
 240                                  return (Uint16*)s;
 241                              s++;
 242                              n--;
 243                          }
 244                      
 245                          if (n && *s == c)
 246 mike           1.112         return (Uint16*)s;
 247                      
 248                          // Not found!
 249                          return 0;
 250                      }
 251                      
 252                      static int _compare(const Uint16* s1, const Uint16* s2)
 253                      {
 254                          while (*s1 && *s2)
 255                          {
 256                              int r = *s1++ - *s2++;
 257                      
 258                              if (r)
 259                                  return r;
 260                          }
 261                      
 262                          if (*s2)
 263                              return -1;
 264                          else if (*s1)
 265                              return 1;
 266                      
 267 mike           1.112     return 0;
 268                      }
 269                      
 270 kumpf          1.130 #ifdef PEGASUS_STRING_NO_UTF8
 271 mike           1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2)
 272                      {
 273                          Uint16 c1;
 274                          Uint16 c2;
 275                      
 276                          do
 277                          {
 278                              c1 = *s1++;
 279                              c2 = *s2++;
 280                      
 281                              if (c1 == 0)
 282                                  return c1 - c2;
 283                          }
 284                          while (c1 == c2);
 285                      
 286                          return c1 - c2;
 287                      }
 288 kumpf          1.130 #endif
 289 mike           1.112 
 290                      static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 291                      {
 292                          memcpy(s1, s2, n * sizeof(Uint16));
 293                      }
 294                      
 295                      void StringThrowOutOfBounds()
 296                      {
 297                          throw IndexOutOfBoundsException();
 298                      }
 299                      
 300                      inline void _checkNullPointer(const void* ptr)
 301                      {
 302                          if (!ptr)
 303                              throw NullPointer();
 304                      }
 305                      
 306                      static void _StringThrowBadUTF8(Uint32 index)
 307                      {
 308                          MessageLoaderParms parms(
 309                              "Common.String.BAD_UTF8",
 310 mike           1.112         "The byte sequence starting at index $0 "
 311                              "is not valid UTF-8 encoding.",
 312                              index);
 313                          throw Exception(parms);
 314                      }
 315                      
 316                      static size_t _copyFromUTF8(
 317 david.dillard  1.116     Uint16* dest,
 318                          const char* src,
 319 mike           1.112     size_t n,
 320                          size_t& utf8_error_index)
 321                      {
 322                          Uint16* p = dest;
 323                          const Uint8* q = (const Uint8*)src;
 324                      
 325                          // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 326                          // Use loop-unrolling.
 327                      
 328                          while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 329                          {
 330                              p[0] = q[0];
 331                              p[1] = q[1];
 332                              p[2] = q[2];
 333                              p[3] = q[3];
 334                              p[4] = q[4];
 335                              p[5] = q[5];
 336                              p[6] = q[6];
 337                              p[7] = q[7];
 338                              p += 8;
 339                              q += 8;
 340 mike           1.112         n -= 8;
 341                          }
 342                      
 343                          while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 344                          {
 345                              p[0] = q[0];
 346                              p[1] = q[1];
 347                              p[2] = q[2];
 348                              p[3] = q[3];
 349                              p += 4;
 350                              q += 4;
 351                              n -= 4;
 352                          }
 353                      
 354                          switch (n)
 355                          {
 356                              case 0:
 357                                  return p - dest;
 358                              case 1:
 359                                  if (q[0] < 128)
 360                                  {
 361 mike           1.112                 p[0] = q[0];
 362                                      return p + 1 - dest;
 363                                  }
 364                                  break;
 365                              case 2:
 366                                  if (((q[0]|q[1]) & 0x80) == 0)
 367                                  {
 368                                      p[0] = q[0];
 369                                      p[1] = q[1];
 370                                      return p + 2 - dest;
 371                                  }
 372                                  break;
 373                              case 3:
 374                                  if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 375                                  {
 376                                      p[0] = q[0];
 377                                      p[1] = q[1];
 378                                      p[2] = q[2];
 379                                      return p + 3 - dest;
 380                                  }
 381                                  break;
 382 mike           1.112     }
 383                      
 384                          // Process remaining characters.
 385                      
 386                          while (n)
 387                          {
 388                              // Optimize for 7-bit ASCII case.
 389                      
 390                              if (*q < 128)
 391                              {
 392                                  *p++ = *q++;
 393                                  n--;
 394                              }
 395                              else
 396                              {
 397                                  Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 398                      
 399                                  if (c > n || !isValid_U8(q, c) ||
 400                                      UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 401                                  {
 402                                      utf8_error_index = q - (const Uint8*)src;
 403 mike           1.112                 return size_t(-1);
 404                                  }
 405                      
 406                                  n -= c;
 407                              }
 408                          }
 409                      
 410                          return p - dest;
 411                      }
 412                      
 413 david.dillard  1.116 // Note: dest must be at least three times src (plus an extra byte for
 414 mike           1.112 // terminator).
 415                      static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 416                      {
 417                          // The following employs loop unrolling for efficiency. Please do not
 418                          // eliminate.
 419                      
 420                          const Uint16* q = src;
 421                          Uint8* p = (Uint8*)dest;
 422                      
 423                          while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 424 kumpf          1.82      {
 425 mike           1.112         p[0] = q[0];
 426                              p[1] = q[1];
 427                              p[2] = q[2];
 428                              p[3] = q[3];
 429                              p += 4;
 430                              q += 4;
 431                              n -= 4;
 432 kumpf          1.82      }
 433 mike           1.112 
 434                          switch (n)
 435                          {
 436                              case 0:
 437                                  return p - (Uint8*)dest;
 438                              case 1:
 439                                  if (q[0] < 128)
 440                                  {
 441                                      p[0] = q[0];
 442                                      return p + 1 - (Uint8*)dest;
 443                                  }
 444                                  break;
 445                              case 2:
 446                                  if (q[0] < 128 && q[1] < 128)
 447                                  {
 448                                      p[0] = q[0];
 449                                      p[1] = q[1];
 450                                      return p + 2 - (Uint8*)dest;
 451                                  }
 452                                  break;
 453                              case 3:
 454 mike           1.112             if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 455                                  {
 456                                      p[0] = q[0];
 457                                      p[1] = q[1];
 458                                      p[2] = q[2];
 459                                      return p + 3 - (Uint8*)dest;
 460                                  }
 461                                  break;
 462                          }
 463                      
 464                          // If this line was reached, there must be characters greater than 128.
 465                      
 466                          UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 467                      
 468                          return p - (Uint8*)dest;
 469 kumpf          1.54  }
 470                      
 471 mike           1.112 static inline size_t _convert(
 472                          Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
 473 kumpf          1.54  {
 474 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
 475                          _copy(p, q, n);
 476                          return n;
 477                      #else
 478                          return _copyFromUTF8(p, q, n, utf8_error_index);
 479                      #endif
 480 kumpf          1.54  }
 481                      
 482 mike           1.112 //==============================================================================
 483                      //
 484                      // class CString
 485                      //
 486                      //==============================================================================
 487                      
 488                      CString::CString(const CString& cstr) : _rep(0)
 489 kumpf          1.54  {
 490 mike           1.112     if (cstr._rep)
 491 kumpf          1.82      {
 492 mike           1.112         size_t n = strlen(cstr._rep) + 1;
 493                              _rep = (char*)operator new(n);
 494                              memcpy(_rep, cstr._rep, n);
 495 kumpf          1.82      }
 496 kumpf          1.54  }
 497                      
 498 kumpf          1.56  CString& CString::operator=(const CString& cstr)
 499                      {
 500 kumpf          1.82      if (&cstr != this)
 501 kumpf          1.81      {
 502 kumpf          1.82          if (_rep)
 503                              {
 504 mike           1.112             operator delete(_rep);
 505 kumpf          1.82              _rep = 0;
 506                              }
 507 mike           1.112 
 508 kumpf          1.82          if (cstr._rep)
 509                              {
 510 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 511                                  _rep = (char*)operator new(n);
 512                                  memcpy(_rep, cstr._rep, n);
 513 kumpf          1.82          }
 514 kumpf          1.81      }
 515 mike           1.112 
 516 kumpf          1.56      return *this;
 517                      }
 518                      
 519 mike           1.112 //==============================================================================
 520 kumpf          1.54  //
 521 mike           1.112 // class StringRep
 522 kumpf          1.39  //
 523 mike           1.112 //==============================================================================
 524 kumpf          1.39  
 525 mike           1.112 StringRep StringRep::_emptyRep;
 526 mike           1.27  
 527 mike           1.112 inline StringRep* StringRep::alloc(size_t cap)
 528 mike           1.27  {
 529 dave.sudlik    1.120     // Check for potential overflow in cap
 530                          PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 531 mike           1.27  
 532 mike           1.112     StringRep* rep = (StringRep*)::operator new(
 533                              sizeof(StringRep) + cap * sizeof(Uint16));
 534                          rep->cap = cap;
 535                          new(&rep->refs) AtomicInt(1);
 536                      
 537                          return rep;
 538 mike           1.27  }
 539                      
 540 mike           1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
 541 chuck          1.102 {
 542 mike           1.114     if (cap > rep->cap || rep->refs.get() != 1)
 543 chuck          1.102     {
 544 mike           1.112         size_t n = _roundUpToPow2(cap);
 545                              StringRep* newRep = StringRep::alloc(n);
 546                              newRep->size = rep->size;
 547                              _copy(newRep->data, rep->data, rep->size + 1);
 548                              StringRep::unref(rep);
 549                              rep = newRep;
 550                          }
 551                      }
 552 david.dillard  1.105 
 553 mike           1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
 554                      {
 555                          StringRep* rep = StringRep::alloc(size);
 556                          rep->size = size;
 557                          _copy(rep->data, data, size);
 558                          rep->data[size] = '\0';
 559                          return rep;
 560                      }
 561 chuck          1.102 
 562 mike           1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
 563                      {
 564                          // Return a new copy of rep. Release rep.
 565 chuck          1.102 
 566 mike           1.112     StringRep* newRep = StringRep::alloc(rep->size);
 567                          newRep->size = rep->size;
 568                          _copy(newRep->data, rep->data, rep->size);
 569                          newRep->data[newRep->size] = '\0';
 570                          StringRep::unref(rep);
 571                          return newRep;
 572 chuck          1.102 }
 573                      
 574 mike           1.112 StringRep* StringRep::create(const char* data, size_t size)
 575 kumpf          1.43  {
 576 mike           1.112     StringRep* rep = StringRep::alloc(size);
 577                          size_t utf8_error_index;
 578                          rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 579                      
 580                          if (rep->size == size_t(-1))
 581                          {
 582                              StringRep::free(rep);
 583 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 584 mike           1.112     }
 585 kumpf          1.43  
 586 mike           1.112     rep->data[rep->size] = '\0';
 587 kumpf          1.43  
 588 mike           1.112     return rep;
 589 mike           1.27  }
 590                      
 591 mike           1.112 Uint32 StringRep::length(const Uint16* str)
 592 mike           1.27  {
 593 mike           1.112     // Note: We could unroll this but it is rarely called.
 594                      
 595                          const Uint16* end = (Uint16*)str;
 596                      
 597                          while (*end++)
 598                              ;
 599                      
 600 a.dunfey       1.125     return (Uint32)(end - str - 1);
 601 kumpf          1.39  }
 602 tony           1.66  
 603 mike           1.112 //==============================================================================
 604                      //
 605                      // class String
 606                      //
 607                      //==============================================================================
 608                      
 609                      const String String::EMPTY;
 610 mike           1.27  
 611 kumpf          1.39  String::String(const String& str, Uint32 n)
 612                      {
 613 mike           1.112     _checkBounds(n, str._rep->size);
 614                          _rep = StringRep::create(str._rep->data, n);
 615 kumpf          1.39  }
 616                      
 617                      String::String(const Char16* str)
 618                      {
 619 mike           1.112     _checkNullPointer(str);
 620                          _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 621 mike           1.27  }
 622                      
 623 kumpf          1.39  String::String(const Char16* str, Uint32 n)
 624                      {
 625 mike           1.112     _checkNullPointer(str);
 626                          _rep = StringRep::create((Uint16*)str, n);
 627 kumpf          1.39  }
 628                      
 629                      String::String(const char* str)
 630 mike           1.27  {
 631 mike           1.112     _checkNullPointer(str);
 632 david.dillard  1.105 
 633 mike           1.112     // Set this just in case create() throws an exception.
 634                          _rep = &StringRep::_emptyRep;
 635                          _rep = StringRep::create(str, strlen(str));
 636 mike           1.27  }
 637                      
 638 kumpf          1.39  String::String(const char* str, Uint32 n)
 639 mike           1.27  {
 640 mike           1.112     _checkNullPointer(str);
 641 david.dillard  1.105 
 642 mike           1.112     // Set this just in case create() throws an exception.
 643                          _rep = &StringRep::_emptyRep;
 644                          _rep = StringRep::create(str, n);
 645 kumpf          1.39  }
 646 mike           1.27  
 647 mike           1.112 String::String(const String& s1, const String& s2)
 648 kumpf          1.39  {
 649 mike           1.112     size_t n1 = s1._rep->size;
 650                          size_t n2 = s2._rep->size;
 651                          size_t n = n1 + n2;
 652                          _rep = StringRep::alloc(n);
 653                          _copy(_rep->data, s1._rep->data, n1);
 654                          _copy(_rep->data + n1, s2._rep->data, n2);
 655                          _rep->size = n;
 656                          _rep->data[n] = '\0';
 657 mike           1.27  }
 658                      
 659 mike           1.112 String::String(const String& s1, const char* s2)
 660 mike           1.27  {
 661 mike           1.112     _checkNullPointer(s2);
 662                          size_t n1 = s1._rep->size;
 663                          size_t n2 = strlen(s2);
 664                          _rep = StringRep::alloc(n1 + n2);
 665                          _copy(_rep->data, s1._rep->data, n1);
 666                          size_t utf8_error_index;
 667                          size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 668                      
 669                          if (tmp == size_t(-1))
 670 kumpf          1.82      {
 671 mike           1.112         StringRep::free(_rep);
 672                              _rep = &StringRep::_emptyRep;
 673 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 674 kumpf          1.82      }
 675 mike           1.112 
 676                          _rep->size = n1 + tmp;
 677                          _rep->data[_rep->size] = '\0';
 678 mike           1.27  }
 679                      
 680 mike           1.112 String::String(const char* s1, const String& s2)
 681 mike           1.27  {
 682 mike           1.112     _checkNullPointer(s1);
 683                          size_t n1 = strlen(s1);
 684                          size_t n2 = s2._rep->size;
 685                          _rep = StringRep::alloc(n1 + n2);
 686                          size_t utf8_error_index;
 687                          size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 688                      
 689                          if (tmp ==  size_t(-1))
 690                          {
 691                              StringRep::free(_rep);
 692                              _rep = &StringRep::_emptyRep;
 693 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 694 mike           1.112     }
 695                      
 696                          _rep->size = n2 + tmp;
 697                          _copy(_rep->data + n1, s2._rep->data, n2);
 698                          _rep->data[_rep->size] = '\0';
 699 mike           1.27  }
 700                      
 701 mike           1.112 String& String::assign(const String& str)
 702 mike           1.27  {
 703 mike           1.112     if (_rep != str._rep)
 704 david.dillard  1.105     {
 705 mike           1.112         StringRep::unref(_rep);
 706                              StringRep::ref(_rep = str._rep);
 707 david.dillard  1.105     }
 708                      
 709 mike           1.27      return *this;
 710                      }
 711                      
 712                      String& String::assign(const Char16* str, Uint32 n)
 713                      {
 714 mike           1.112     _checkNullPointer(str);
 715                      
 716 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 717 david.dillard  1.105     {
 718 mike           1.112         StringRep::unref(_rep);
 719                              _rep = StringRep::alloc(n);
 720 david.dillard  1.105     }
 721                      
 722 mike           1.112     _rep->size = n;
 723                          _copy(_rep->data, (Uint16*)str, n);
 724                          _rep->data[n] = '\0';
 725                      
 726 mike           1.27      return *this;
 727                      }
 728                      
 729 mike           1.112 String& String::assign(const char* str, Uint32 n)
 730 chuck          1.102 {
 731 mike           1.112     _checkNullPointer(str);
 732                      
 733 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 734 david.dillard  1.105     {
 735 mike           1.112         StringRep::unref(_rep);
 736                              _rep = StringRep::alloc(n);
 737 david.dillard  1.105     }
 738                      
 739 mike           1.112     size_t utf8_error_index;
 740                          _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 741 chuck          1.102 
 742 mike           1.112     if (_rep->size ==  size_t(-1))
 743 david.dillard  1.105     {
 744 mike           1.112         StringRep::free(_rep);
 745                              _rep = &StringRep::_emptyRep;
 746 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 747 david.dillard  1.105     }
 748 mike           1.112 
 749                          _rep->data[_rep->size] = 0;
 750 david.dillard  1.105 
 751 mike           1.27      return *this;
 752                      }
 753                      
 754 kumpf          1.39  void String::clear()
 755                      {
 756 mike           1.112     if (_rep->size)
 757                          {
 758 mike           1.114         if (_rep->refs.get() == 1)
 759 mike           1.112         {
 760                                  _rep->size = 0;
 761                                  _rep->data[0] = '\0';
 762                              }
 763                              else
 764                              {
 765                                  StringRep::unref(_rep);
 766                                  _rep = &StringRep::_emptyRep;
 767                              }
 768                          }
 769 kumpf          1.39  }
 770                      
 771 mike           1.112 void String::reserveCapacity(Uint32 cap)
 772 kumpf          1.39  {
 773 mike           1.112     _reserve(_rep, cap);
 774 kumpf          1.39  }
 775                      
 776 mike           1.112 CString String::getCString() const
 777                      {
 778 david.dillard  1.116     // A UTF8 string can have three times as many characters as its UTF16
 779                          // counterpart, so we allocate extra memory for the worst case. In the
 780 mike           1.112     // best case, we may need only one third of the memory allocated. But
 781 david.dillard  1.116     // downsizing the string afterwards is expensive and unecessary since
 782                          // CString objects are usually short-lived (disappearing after only a few
 783 mike           1.112     // instructions). CString objects are typically created on the stack as
 784                          // means to obtain a char* pointer.
 785                      
 786                      #ifdef PEGASUS_STRING_NO_UTF8
 787                          char* str = (char*)operator new(_rep->size + 1);
 788                          _copy(str, _rep->data, _rep->size);
 789                          str[_rep->size] = '\0';
 790                          return CString(str);
 791 gs.keenan      1.110 #else
 792 a.dunfey       1.125     Uint32 n = (Uint32)(3 * _rep->size);
 793 mike           1.112     char* str = (char*)operator new(n + 1);
 794                          size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 795                          str[size] = '\0';
 796                          return CString(str);
 797 gs.keenan      1.110 #endif
 798 kumpf          1.39  }
 799                      
 800 mike           1.112 String& String::append(const Char16* str, Uint32 n)
 801 kumpf          1.39  {
 802 mike           1.112     _checkNullPointer(str);
 803                      
 804                          size_t oldSize = _rep->size;
 805                          size_t newSize = oldSize + n;
 806 a.dunfey       1.125     _reserve(_rep, (Uint32)newSize);
 807 mike           1.112     _copy(_rep->data + oldSize, (Uint16*)str, n);
 808                          _rep->size = newSize;
 809                          _rep->data[newSize] = '\0';
 810                      
 811                          return *this;
 812 kumpf          1.39  }
 813                      
 814 mike           1.112 String& String::append(const String& str)
 815 mike           1.27  {
 816 a.dunfey       1.125     return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
 817 mike           1.27  }
 818                      
 819 mike           1.112 String& String::append(const char* str, Uint32 size)
 820 mike           1.27  {
 821 mike           1.112     _checkNullPointer(str);
 822                      
 823                          size_t oldSize = _rep->size;
 824                          size_t cap = oldSize + size;
 825                      
 826 a.dunfey       1.125     _reserve(_rep, (Uint32)cap);
 827 mike           1.112     size_t utf8_error_index;
 828                          size_t tmp = _convert(
 829                              (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 830                      
 831                          if (tmp ==  size_t(-1))
 832                          {
 833                              StringRep::free(_rep);
 834                              _rep = &StringRep::_emptyRep;
 835 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 836 mike           1.112     }
 837 mike           1.27  
 838 mike           1.112     _rep->size += tmp;
 839                          _rep->data[_rep->size] = '\0';
 840 mike           1.27  
 841 kumpf          1.39      return *this;
 842                      }
 843                      
 844 mike           1.112 void String::remove(Uint32 index, Uint32 n)
 845 mike           1.27  {
 846 mike           1.112     if (n == PEG_NOT_FOUND)
 847 a.dunfey       1.125         n = (Uint32)(_rep->size - index);
 848 mike           1.112 
 849                          _checkBounds(index + n, _rep->size);
 850                      
 851 mike           1.114     if (_rep->refs.get() != 1)
 852 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 853 mike           1.27  
 854 jim.wunderlich 1.115     PEGASUS_ASSERT(index + n <= _rep->size);
 855 mike           1.27  
 856 mike           1.112     size_t rem = _rep->size - (index + n);
 857                          Uint16* data = _rep->data;
 858 mike           1.27  
 859 mike           1.112     if (rem)
 860                              memmove(data + index, data + index + n, rem * sizeof(Uint16));
 861 mike           1.27  
 862 mike           1.112     _rep->size -= n;
 863                          data[_rep->size] = '\0';
 864 mike           1.27  }
 865                      
 866 mike           1.112 String String::subString(Uint32 index, Uint32 n) const
 867 mike           1.27  {
 868 mike           1.112     // Note: this implementation is very permissive but used for
 869                          // backwards compatibility.
 870                      
 871                          if (index < _rep->size)
 872 mike           1.27      {
 873 mike           1.112         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 874 a.dunfey       1.125             n = (Uint32)(_rep->size - index);
 875 mike           1.27  
 876 w.otsuka       1.121         return String((Char16*)(_rep->data + index), n);
 877 mike           1.27      }
 878 david.dillard  1.105 
 879                          return String();
 880 mike           1.27  }
 881                      
 882                      Uint32 String::find(Char16 c) const
 883                      {
 884 mike           1.112     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 885 mike           1.27  
 886 mike           1.112     if (p)
 887 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 888 mike           1.27  
 889                          return PEG_NOT_FOUND;
 890                      }
 891                      
 892 kumpf          1.53  Uint32 String::find(Uint32 index, Char16 c) const
 893 mike           1.30  {
 894 mike           1.112     _checkBounds(index, _rep->size);
 895                      
 896                          if (index >= _rep->size)
 897                              return PEG_NOT_FOUND;
 898                      
 899                          Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 900 mike           1.30  
 901 mike           1.112     if (p)
 902 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 903 mike           1.30  
 904                          return PEG_NOT_FOUND;
 905                      }
 906                      
 907 mike           1.112 Uint32 StringFindAux(
 908                          const StringRep* _rep, const Char16* s, Uint32 n)
 909 mike           1.27  {
 910 mike           1.112     _checkNullPointer(s);
 911 mike           1.27  
 912 mike           1.112     const Uint16* data = _rep->data;
 913                          size_t rem = _rep->size;
 914                      
 915                          while (n <= rem)
 916 mike           1.30      {
 917 mike           1.112         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 918                      
 919                              if (!p)
 920                                  break;
 921 mike           1.30  
 922 mike           1.112         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 923 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 924 david.dillard  1.105 
 925 mike           1.112         p++;
 926                              rem -= p - data;
 927                              data = p;
 928 mike           1.27      }
 929 mike           1.112 
 930 mike           1.27      return PEG_NOT_FOUND;
 931                      }
 932                      
 933 mike           1.112 Uint32 String::find(const char* s) const
 934                      {
 935                          _checkNullPointer(s);
 936                      
 937                          // Note: could optimize away creation of temporary, but this is rarely
 938                          // called.
 939                          return find(String(s));
 940                      }
 941                      
 942 mike           1.27  Uint32 String::reverseFind(Char16 c) const
 943                      {
 944 mike           1.112     Uint16 x = c;
 945                          Uint16* p = _rep->data;
 946                          Uint16* q = _rep->data + _rep->size;
 947 mike           1.27  
 948 mike           1.112     while (q != p)
 949 mike           1.27      {
 950 mike           1.112         if (*--q == x)
 951 david.dillard  1.116             return static_cast<Uint32>(q - p);
 952 mike           1.27      }
 953                      
 954                          return PEG_NOT_FOUND;
 955                      }
 956                      
 957                      void String::toLower()
 958                      {
 959 david          1.69  #ifdef PEGASUS_HAS_ICU
 960 mike           1.112 
 961 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 962 david          1.90      {
 963 mike           1.114         if (_rep->refs.get() != 1)
 964 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 965                      
 966 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 967 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 968                              // prevents optimizations where the us-ascii is converted before
 969 mike           1.112         // calling ICU.
 970 yi.zhou        1.108         // The string may shrink or expand after the convert.
 971                      
 972 mike           1.112         //// First calculate size of resulting string. u_strToLower() returns
 973                              //// only the size when zero is passed as the destination size argument.
 974                      
 975 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 976                      
 977 mike           1.112         int32_t newSize = u_strToLower(
 978                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 979 david.dillard  1.116 
 980 mike           1.112         err = U_ZERO_ERROR;
 981                      
 982                              //// Reserve enough space for the result.
 983                      
 984                              if ((Uint32)newSize > _rep->cap)
 985                                  _reserve(_rep, newSize);
 986                      
 987                              //// Perform the conversion (overlapping buffers are allowed).
 988 chuck          1.99  
 989 mike           1.112         u_strToLower((UChar*)_rep->data, newSize,
 990                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 991 yi.zhou        1.108 
 992 mike           1.112         _rep->size = newSize;
 993                              return;
 994 david          1.90      }
 995 mike           1.112 
 996                      #endif /* PEGASUS_HAS_ICU */
 997                      
 998 mike           1.114     if (_rep->refs.get() != 1)
 999 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
1000                      
1001                          Uint16* p = _rep->data;
1002                          size_t n = _rep->size;
1003                      
1004                          for (; n--; p++)
1005 david          1.90      {
1006 mike           1.112         if (!(*p & 0xFF00))
1007                                  *p = _toLower(*p);
1008 mike           1.27      }
1009 kumpf          1.39  }
1010                      
1011 chuck          1.99  void String::toUpper()
1012 david          1.90  {
1013                      #ifdef PEGASUS_HAS_ICU
1014 mike           1.112 
1015 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1016 chuck          1.99      {
1017 mike           1.114         if (_rep->refs.get() != 1)
1018 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1019                      
1020 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
1021 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
1022                              // prevents optimizations where the us-ascii is converted before
1023 mike           1.112         // calling ICU.
1024 yi.zhou        1.108         // The string may shrink or expand after the convert.
1025                      
1026 mike           1.112         //// First calculate size of resulting string. u_strToUpper() returns
1027                              //// only the size when zero is passed as the destination size argument.
1028                      
1029 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
1030                      
1031 mike           1.112         int32_t newSize = u_strToUpper(
1032                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1033                      
1034                              err = U_ZERO_ERROR;
1035                      
1036                              //// Reserve enough space for the result.
1037                      
1038                              if ((Uint32)newSize > _rep->cap)
1039                                  _reserve(_rep, newSize);
1040                      
1041                              //// Perform the conversion (overlapping buffers are allowed).
1042                      
1043                              u_strToUpper((UChar*)_rep->data, newSize,
1044                                  (UChar*)_rep->data, _rep->size, NULL, &err);
1045 chuck          1.99  
1046 mike           1.112         _rep->size = newSize;
1047 yi.zhou        1.108 
1048 mike           1.112         return;
1049 david          1.91      }
1050 mike           1.112 
1051                      #endif /* PEGASUS_HAS_ICU */
1052                      
1053 mike           1.114     if (_rep->refs.get() != 1)
1054 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
1055                      
1056                          Uint16* p = _rep->data;
1057                          size_t n = _rep->size;
1058                      
1059                          for (; n--; p++)
1060                              *p = _toUpper(*p);
1061 david          1.90  }
1062                      
1063 kumpf          1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
1064 kumpf          1.39  {
1065 kumpf          1.118     const Uint16* p1 = s1._rep->data;
1066                          const Uint16* p2 = s2._rep->data;
1067 mike           1.27  
1068 kumpf          1.118     while (n--)
1069                          {
1070                              int r = *p1++ - *p2++;
1071                              if (r)
1072                              {
1073                                  return r;
1074                              }
1075                              else if (!p1[-1])
1076                              {
1077                                  // We must have encountered a null terminator in both s1 and s2
1078                                  return 0;
1079                              }
1080                          }
1081                          return 0;
1082 mike           1.27  }
1083                      
1084 kumpf          1.43  int String::compare(const String& s1, const String& s2)
1085 mike           1.30  {
1086 mike           1.112     return _compare(s1._rep->data, s2._rep->data);
1087                      }
1088 kumpf          1.43  
1089 mike           1.112 int String::compare(const String& s1, const char* s2)
1090                      {
1091                          _checkNullPointer(s2);
1092 mike           1.30  
1093 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
1094                          return _compareNoUTF8(s1._rep->data, s2);
1095                      #else
1096                          // ATTN: optimize this!
1097                          return String::compare(s1, String(s2));
1098                      #endif
1099 mike           1.30  }
1100                      
1101 mike           1.112 int String::compareNoCase(const String& str1, const String& str2)
1102 kumpf          1.40  {
1103 david          1.69  #ifdef PEGASUS_HAS_ICU
1104 mike           1.112 
1105 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1106                          {
1107 mike           1.112         return  u_strcasecmp(
1108 dave.sudlik    1.124             (const UChar*)str1._rep->data,
1109                                  (const UChar*)str2._rep->data,
1110                                  U_FOLD_CASE_DEFAULT
1111                                  );
1112 yi.zhou        1.108     }
1113 kumpf          1.40  
1114 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1115                      
1116                          const Uint16* s1 = str1._rep->data;
1117                          const Uint16* s2 = str2._rep->data;
1118                      
1119                          while (*s1 && *s2)
1120 kumpf          1.40      {
1121 mike           1.112         int r = _toLower(*s1++) - _toLower(*s2++);
1122 kumpf          1.40  
1123 david.dillard  1.105         if (r)
1124                                  return r;
1125 kumpf          1.40      }
1126                      
1127 mike           1.112     if (*s2)
1128 david.dillard  1.105         return -1;
1129 mike           1.112     else if (*s1)
1130 david.dillard  1.105         return 1;
1131 kumpf          1.40  
1132                          return 0;
1133                      }
1134                      
1135 mike           1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1136 mike           1.27  {
1137 mike           1.112 #ifdef PEGASUS_HAS_ICU
1138                      
1139                          return String::compareNoCase(s1, s2) == 0;
1140                      
1141                      #else /* PEGASUS_HAS_ICU */
1142 mike           1.27  
1143 mike           1.112     // The following employs loop unrolling for efficiency. Please do not
1144                          // eliminate.
1145 kumpf          1.39  
1146 mike           1.112     Uint16* p = (Uint16*)s1.getChar16Data();
1147                          Uint16* q = (Uint16*)s2.getChar16Data();
1148                          Uint32 n = s2.size();
1149                      
1150                          while (n >= 8)
1151                          {
1152                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1153                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1154                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1155                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1156                                  ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1157                                  ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1158                                  ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1159                                  ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1160                              {
1161                                  return false;
1162                              }
1163 kumpf          1.39  
1164 mike           1.112         n -= 8;
1165                              p += 8;
1166                              q += 8;
1167                          }
1168 mike           1.27  
1169 mike           1.112     while (n >= 4)
1170 kumpf          1.39      {
1171 mike           1.112         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1172                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1173                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1174                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1175 david.dillard  1.105         {
1176 mike           1.112             return false;
1177 david.dillard  1.105         }
1178 mike           1.112 
1179                              n -= 4;
1180                              p += 4;
1181                              q += 4;
1182                          }
1183                      
1184                          while (n--)
1185                          {
1186                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1187 david.dillard  1.105             return false;
1188 mike           1.112 
1189                              p++;
1190                              q++;
1191 kumpf          1.39      }
1192 mike           1.28  
1193 kumpf          1.39      return true;
1194 mike           1.112 
1195                      #endif /* PEGASUS_HAS_ICU */
1196 david          1.69  }
1197                      
1198 mike           1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1199 david          1.69  {
1200 mike           1.112     _checkNullPointer(s2);
1201 david          1.69  
1202 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1203 david          1.69  
1204 mike           1.112     return String::equalNoCase(s1, String(s2));
1205 david          1.69  
1206 mike           1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1207 david          1.69  
1208 mike           1.112     const Uint16* p1 = (Uint16*)s1._rep->data;
1209                          const char* p2 = s2;
1210                          size_t n = s1._rep->size;
1211 david.dillard  1.105 
1212 mike           1.112     while (n--)
1213                          {
1214                              if (!*p2)
1215                                  return false;
1216 david          1.71  
1217 mike           1.112         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1218                                  return false;
1219                          }
1220 kumpf          1.42  
1221 mike           1.112     if (*p2)
1222                              return false;
1223 david.dillard  1.116 
1224 mike           1.112     return true;
1225 karl           1.36  
1226 mike           1.112 #else /* PEGASUS_HAS_ICU */
1227 david.dillard  1.105 
1228 mike           1.112     // ATTN: optimize this!
1229                          return String::equalNoCase(s1, String(s2));
1230 david.dillard  1.105 
1231 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1232                      }
1233 chuck          1.78  
1234 mike           1.112 Boolean String::equal(const String& s1, const String& s2)
1235 karl           1.36  {
1236 marek          1.131     return (s1._rep == s2._rep) ||
1237 kumpf          1.135         (s1._rep->size == s2._rep->size) &&
1238 marek          1.131         memcmp(s1._rep->data,
1239 kumpf          1.135                s2._rep->data,
1240 marek          1.131                s1._rep->size * sizeof(Uint16)) == 0;
1241 karl           1.36  }
1242                      
1243 mike           1.112 Boolean String::equal(const String& s1, const char* s2)
1244                      {
1245                      #ifdef PEGASUS_STRING_NO_UTF8
1246 kumpf          1.35  
1247 mike           1.112     _checkNullPointer(s2);
1248 kumpf          1.39  
1249 mike           1.112     const Uint16* p = (Uint16*)s1._rep->data;
1250                          const char* q = s2;
1251 kumpf          1.39  
1252 mike           1.112     while (*p && *q)
1253                          {
1254                              if (*p++ != Uint16(*q++))
1255                                  return false;
1256                          }
1257 kumpf          1.39  
1258 mike           1.112     return !(*p || *q);
1259 kumpf          1.39  
1260 mike           1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1261 kumpf          1.39  
1262 mike           1.112     return String::equal(s1, String(s2));
1263 kumpf          1.39  
1264 mike           1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1265 kumpf          1.39  }
1266                      
1267 kumpf          1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1268 kumpf          1.39  {
1269 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1270 david          1.69  
1271 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1272                          {
1273 david.dillard  1.105         char *buf = NULL;
1274                              const int size = str.size() * 6;
1275 mike           1.112         UnicodeString UniStr(
1276                                  (const UChar *)str.getChar16Data(), (int32_t)str.size());
1277 david.dillard  1.105         Uint32 bufsize = UniStr.extract(0,size,buf);
1278                              buf = new char[bufsize+1];
1279                              UniStr.extract(0,bufsize,buf);
1280                              os << buf;
1281                              os.flush();
1282                              delete [] buf;
1283 david.dillard  1.116         return os;
1284 yi.zhou        1.108     }
1285 mike           1.112 
1286 david.dillard  1.116 #endif  // PEGASUS_HAS_ICU
1287 mike           1.112 
1288                          for (Uint32 i = 0, n = str.size(); i < n; i++)
1289 yi.zhou        1.108     {
1290 mike           1.112         Uint16 code = str[i];
1291 david.dillard  1.105 
1292 mike           1.112         if (code > 0 && !(code & 0xFF00))
1293                                      os << char(code);
1294                              else
1295                                  {
1296                                  // Print in hex format:
1297                                  char buffer[8];
1298                                  sprintf(buffer, "\\x%04X", code);
1299                                  os << buffer;
1300 david.dillard  1.105         }
1301 yi.zhou        1.108     }
1302 kumpf          1.39  
1303                          return os;
1304                      }
1305                      
1306 mike           1.112 void StringAppendCharAux(StringRep*& _rep)
1307 kumpf          1.39  {
1308 mike           1.112     StringRep* tmp;
1309                      
1310                          if (_rep->cap)
1311                          {
1312                              tmp = StringRep::alloc(2 * _rep->cap);
1313                              tmp->size = _rep->size;
1314                              _copy(tmp->data, _rep->data, _rep->size);
1315                          }
1316                          else
1317                          {
1318                              tmp = StringRep::alloc(8);
1319                              tmp->size = 0;
1320                          }
1321                      
1322                          StringRep::unref(_rep);
1323                          _rep = tmp;
1324 kumpf          1.39  }
1325                      
1326 thilo.boehm    1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1327                      {
1328                          class StringLayout
1329                          {
1330                          public:
1331                              StringRep* rep;
1332                          };
1333                      
1334 kumpf          1.130     StringLayout* that = reinterpret_cast<StringLayout*>(&s);
1335 thilo.boehm    1.128 
1336                          _checkNullPointer(str);
1337                      
1338                          if (n > that->rep->cap || that->rep->refs.get() != 1)
1339                          {
1340                              StringRep::unref(that->rep);
1341                              that->rep = StringRep::alloc(n);
1342                          }
1343                      
1344                          _copy(that->rep->data, str, n);
1345                          that->rep->size = n;
1346                          that->rep->data[that->rep->size] = 0;
1347                      }
1348                      
1349 mike           1.112 PEGASUS_NAMESPACE_END
1350                      
1351                      /*
1352                      ================================================================================
1353                      
1354                      String optimizations:
1355                      
1356                          1.  Added mechanism allowing certain functions to be inlined only when
1357                              used by internal Pegasus modules. External modules (i.e., providers)
1358                              link to a non-inline version, which allows for binary compatibility.
1359                      
1360                          2.  Implemented copy-on-write with atomic increment/decrement. This
1361                              yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1362                              for the 'ni1000' benchmark.
1363                      
1364                          3.  Employed loop unrolling in several places. For example, see:
1365                      
1366                                  static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1367                      
1368                          4.  Used the "empty-rep" optimization (described in whitepaper from the
1369                              GCC Developers Summit). This reduced default construction to a simple
1370 mike           1.112         pointer assignment.
1371                      
1372                                  inline String::String() : _rep(&_emptyRep) { }
1373                      
1374                          5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1375                              For example:
1376                      
1377                                  static const char _upper[] =
1378                                  {
1379                                      0,1,2,...255
1380                                  };
1381                      
1382                                  inline Uint16 _toUpper(Uint16 x)
1383                                  {
1384                                      return (x & 0xFF00) ? x : _upper[x];
1385                                  }
1386                      
1387 david.dillard  1.116         This outperforms the system implementation by avoiding an anding
1388 mike           1.112         operation.
1389                      
1390 david.dillard  1.116     6.  Implemented char* version of the following member functions to
1391                              eliminate unecessary creation of anonymous string objects
1392 mike           1.112         (temporaries).
1393                      
1394                                  String(const String& s1, const char* s2);
1395                                  String(const char* s1, const String& s2);
1396                                  String& String::operator=(const char* str);
1397                                  Uint32 String::find(const char* s) const;
1398                                  bool String::equal(const String& s1, const char* s2);
1399                                  static int String::compare(const String& s1, const char* s2);
1400                                  String& String::append(const char* str);
1401                                  String& String::append(const char* str, Uint32 size);
1402                                  static bool String::equalNoCase(const String& s1, const char* s2);
1403                                  String& operator=(const char* str)
1404                                  String& String::assign(const char* str)
1405                                  String& String::append(const char* str)
1406                                  Boolean operator==(const String& s1, const char* s2)
1407                                  Boolean operator==(const char* s1, const String& s2)
1408                                  Boolean operator!=(const String& s1, const char* s2)
1409                                  Boolean operator!=(const char* s1, const String& s2)
1410                                  Boolean operator<(const String& s1, const char* s2)
1411                                  Boolean operator<(const char* s1, const String& s2)
1412                                  Boolean operator>(const String& s1, const char* s2)
1413 mike           1.112             Boolean operator>(const char* s1, const String& s2)
1414                                  Boolean operator<=(const String& s1, const char* s2)
1415                                  Boolean operator<=(const char* s1, const String& s2)
1416                                  Boolean operator>=(const String& s1, const char* s2)
1417                                  Boolean operator>=(const char* s1, const String& s2)
1418                                  String operator+(const String& s1, const char* s2)
1419                                  String operator+(const char* s1, const String& s2)
1420                      
1421 david.dillard  1.116     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1422 mike           1.112         power of two (algorithm from the book "Hacker's Delight").
1423                      
1424                                  static Uint32 _roundUpToPow2(Uint32 x)
1425                                  {
1426                                      if (x < 8)
1427                                          return 8;
1428                      
1429                                      x--;
1430                                      x |= (x >> 1);
1431                                      x |= (x >> 2);
1432                                      x |= (x >> 4);
1433                                      x |= (x >> 8);
1434                                      x |= (x >> 16);
1435                                      x++;
1436                      
1437                                      return x;
1438                                  }
1439                      
1440                          8.  Implemented "concatenating constructors" to eliminate temporaries
1441 david.dillard  1.116         created by operator+(). This scheme employs the "return-value
1442 mike           1.112         optimization" described by Stan Lippman.
1443                      
1444                                  inline String operator+(const String& s1, const String& s2)
1445                                  {
1446                                      return String(s1, s2, 0);
1447                                  }
1448                      
1449                          9.  Experimented to find the optimial initial size for a short string.
1450                              Eight seems to offer the best tradeoff between space and time.
1451                      
1452                          10. Inlined all members of the Char16 class.
1453                      
1454                          11. Used Uint16 internally in the String class. This showed no improvememnt
1455                              since Char16 was already fully inlined and was essentially reduced to
1456                              Uint16 in any case.
1457                      
1458                          12. Implemented conditional logic (#if) allowing error checking logic to
1459 david.dillard  1.116         be excluded to better performance. Examples include bounds checking
1460 mike           1.112         and null-pointer checking.
1461                      
1462                          13. Used memcpy() and memcmp() where possible. These are implemented using
1463                              the rep family of intructions under Intel and are much faster.
1464                      
1465 david.dillard  1.116     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1466 mike           1.112         copy routine overhead.
1467                      
1468                          15. Added ASCII7 form of the constructor and assign().
1469                      
1470                                  String s("hello world", String::ASCII7);
1471                      
1472                                  s.assignASCII7("hello world");
1473                      
1474                              This avoids slower UTF8 processing when not needed.
1475                      
1476                      ================================================================================
1477                      */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2