(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.119 //%2006////////////////////////////////////////////////////////////////////////
   2 mike  1.27  //
   3 karl  1.97  // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4             // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5             // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85  // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97  // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8             // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98  // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10             // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 karl  1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  12             // EMC Corporation; Symantec Corporation; The Open Group.
  13 mike  1.27  //
  14             // Permission is hereby granted, free of charge, to any person obtaining a copy
  15 kumpf 1.41  // of this software and associated documentation files (the "Software"), to
  16             // deal in the Software without restriction, including without limitation the
  17             // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  18 mike  1.27  // sell copies of the Software, and to permit persons to whom the Software is
  19             // furnished to do so, subject to the following conditions:
  20 karl  1.119 // 
  21 kumpf 1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  22 mike  1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  23             // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  24 kumpf 1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  25             // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  26             // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  27 mike  1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28             // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29             //
  30             //==============================================================================
  31             //
  32             //%/////////////////////////////////////////////////////////////////////////////
  33             
  34 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  35 mike           1.113 #include <cstring>
  36 kumpf          1.48  #include "InternalException.h"
  37 david          1.69  #include "CommonUTF.h"
  38 mike           1.112 #include "MessageLoader.h"
  39                      #include "StringRep.h"
  40 david          1.69  
  41                      #ifdef PEGASUS_HAS_ICU
  42 chuck          1.99  #include <unicode/ustring.h>
  43                      #include <unicode/uchar.h>
  44 david          1.69  #endif
  45                      
  46 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  47 mike           1.28  
  48 mike           1.112 //==============================================================================
  49                      //
  50                      // Compile-time macros (undefined by default).
  51                      //
  52                      //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  53                      //
  54                      //==============================================================================
  55 mike           1.27  
  56 mike           1.112 //==============================================================================
  57 kumpf          1.39  //
  58 mike           1.112 // File-scope definitions:
  59 kumpf          1.54  //
  60 mike           1.112 //==============================================================================
  61                      
  62                      // Note: this table is much faster than the system toupper(). Please do not
  63                      // change.
  64 kumpf          1.54  
  65 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  66 kumpf          1.54  {
  67 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  68                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  69                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  70                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  71                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  72                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  73                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  74                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  75                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  76                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  77                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  78                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  79                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  80                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  81                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  82                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  83                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  84                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  85                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  86                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  87                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  88 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  89                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  90                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  91                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  92                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  93                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  94                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  95                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
  96                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
  97                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
  98                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
  99                      };
 100                      
 101                      // Note: this table is much faster than the system tulower(). Please do not
 102                      // change.
 103                      
 104 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 105 mike           1.112 {
 106                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 107                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 108                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 109                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 110                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 111                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 112                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 113                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 114                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 115                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 116                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 117                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 118                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 119                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 120                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 121                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 122                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 123                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 124                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 125                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 126 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 127                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 128                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 129                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 130                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 131                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 132                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 133                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 134                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 135                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 136                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 137                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 138                      };
 139                      
 140                      // Converts 16-bit characters to upper case. This routine is faster than the
 141                      // system toupper(). Please do not change.
 142                      inline Uint16 _toUpper(Uint16 x)
 143                      {
 144                          return (x & 0xFF00) ? x : _toUpperTable[x];
 145 kumpf          1.54  }
 146                      
 147 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 148                      // system toupper(). Please do not change.
 149                      inline Uint16 _toLower(Uint16 x)
 150 kumpf          1.54  {
 151 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 152                      }
 153                      
 154                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 155                      static Uint32 _roundUpToPow2(Uint32 x)
 156                      {
 157 dave.sudlik    1.120     // Check for potential overflow in x
 158                          PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 159 mike           1.112 
 160                          if (x < 8)
 161                              return 8;
 162                      
 163                          x--;
 164                          x |= (x >> 1);
 165                          x |= (x >> 2);
 166                          x |= (x >> 4);
 167                          x |= (x >> 8);
 168                          x |= (x >> 16);
 169                          x++;
 170                      
 171                          return x;
 172                      }
 173                      
 174                      template<class P, class Q>
 175                      static void _copy(P* p, const Q* q, size_t n)
 176                      {
 177                          // The following employs loop unrolling for efficiency. Please do not
 178                          // eliminate.
 179                      
 180 mike           1.112     while (n >= 8)
 181                          {
 182                              p[0] = q[0];
 183                              p[1] = q[1];
 184                              p[2] = q[2];
 185                              p[3] = q[3];
 186                              p[4] = q[4];
 187                              p[5] = q[5];
 188                              p[6] = q[6];
 189                              p[7] = q[7];
 190                              p += 8;
 191                              q += 8;
 192                              n -= 8;
 193                          }
 194                      
 195                          while (n >= 4)
 196                          {
 197                              p[0] = q[0];
 198                              p[1] = q[1];
 199                              p[2] = q[2];
 200                              p[3] = q[3];
 201 mike           1.112         p += 4;
 202                              q += 4;
 203                              n -= 4;
 204                          }
 205                      
 206                          while (n--)
 207                              *p++ = *q++;
 208                      }
 209                      
 210                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 211                      {
 212                          // The following employs loop unrolling for efficiency. Please do not
 213                          // eliminate.
 214                      
 215                          while (n >= 4)
 216                          {
 217                              if (s[0] == c)
 218                                  return (Uint16*)s;
 219                              if (s[1] == c)
 220                                  return (Uint16*)&s[1];
 221                              if (s[2] == c)
 222 mike           1.112             return (Uint16*)&s[2];
 223                              if (s[3] == c)
 224                                  return (Uint16*)&s[3];
 225 kumpf          1.82  
 226 mike           1.112         n -= 4;
 227                              s += 4;
 228                          }
 229                      
 230                          if (n)
 231                          {
 232                              if (*s == c)
 233                                  return (Uint16*)s;
 234                              s++;
 235                              n--;
 236                          }
 237                      
 238                          if (n)
 239                          {
 240                              if (*s == c)
 241                                  return (Uint16*)s;
 242                              s++;
 243                              n--;
 244                          }
 245                      
 246                          if (n && *s == c)
 247 mike           1.112         return (Uint16*)s;
 248                      
 249                          // Not found!
 250                          return 0;
 251                      }
 252                      
 253                      static int _compare(const Uint16* s1, const Uint16* s2)
 254                      {
 255                          while (*s1 && *s2)
 256                          {
 257                              int r = *s1++ - *s2++;
 258                      
 259                              if (r)
 260                                  return r;
 261                          }
 262                      
 263                          if (*s2)
 264                              return -1;
 265                          else if (*s1)
 266                              return 1;
 267                      
 268 mike           1.112     return 0;
 269                      }
 270                      
 271                      static int _compareNoUTF8(const Uint16* s1, const char* s2)
 272                      {
 273                          Uint16 c1;
 274                          Uint16 c2;
 275                      
 276                          do
 277                          {
 278                              c1 = *s1++;
 279                              c2 = *s2++;
 280                      
 281                              if (c1 == 0)
 282                                  return c1 - c2;
 283                          }
 284                          while (c1 == c2);
 285                      
 286                          return c1 - c2;
 287                      }
 288                      
 289 mike           1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 290                      {
 291                          memcpy(s1, s2, n * sizeof(Uint16));
 292                      }
 293                      
 294                      void StringThrowOutOfBounds()
 295                      {
 296                          throw IndexOutOfBoundsException();
 297                      }
 298                      
 299                      inline void _checkNullPointer(const void* ptr)
 300                      {
 301                          if (!ptr)
 302                              throw NullPointer();
 303                      }
 304                      
 305                      static void _StringThrowBadUTF8(Uint32 index)
 306                      {
 307                          MessageLoaderParms parms(
 308                              "Common.String.BAD_UTF8",
 309                              "The byte sequence starting at index $0 "
 310 mike           1.112         "is not valid UTF-8 encoding.",
 311                              index);
 312                          throw Exception(parms);
 313                      }
 314                      
 315                      static size_t _copyFromUTF8(
 316 david.dillard  1.116     Uint16* dest,
 317                          const char* src,
 318 mike           1.112     size_t n,
 319                          size_t& utf8_error_index)
 320                      {
 321                          Uint16* p = dest;
 322                          const Uint8* q = (const Uint8*)src;
 323                      
 324                          // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 325                          // Use loop-unrolling.
 326                      
 327                          while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 328                          {
 329                              p[0] = q[0];
 330                              p[1] = q[1];
 331                              p[2] = q[2];
 332                              p[3] = q[3];
 333                              p[4] = q[4];
 334                              p[5] = q[5];
 335                              p[6] = q[6];
 336                              p[7] = q[7];
 337                              p += 8;
 338                              q += 8;
 339 mike           1.112         n -= 8;
 340                          }
 341                      
 342                          while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 343                          {
 344                              p[0] = q[0];
 345                              p[1] = q[1];
 346                              p[2] = q[2];
 347                              p[3] = q[3];
 348                              p += 4;
 349                              q += 4;
 350                              n -= 4;
 351                          }
 352                      
 353                          switch (n)
 354                          {
 355                              case 0:
 356                                  return p - dest;
 357                              case 1:
 358                                  if (q[0] < 128)
 359                                  {
 360 mike           1.112                 p[0] = q[0];
 361                                      return p + 1 - dest;
 362                                  }
 363                                  break;
 364                              case 2:
 365                                  if (((q[0]|q[1]) & 0x80) == 0)
 366                                  {
 367                                      p[0] = q[0];
 368                                      p[1] = q[1];
 369                                      return p + 2 - dest;
 370                                  }
 371                                  break;
 372                              case 3:
 373                                  if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 374                                  {
 375                                      p[0] = q[0];
 376                                      p[1] = q[1];
 377                                      p[2] = q[2];
 378                                      return p + 3 - dest;
 379                                  }
 380                                  break;
 381 mike           1.112     }
 382                      
 383                          // Process remaining characters.
 384                      
 385                          while (n)
 386                          {
 387                              // Optimize for 7-bit ASCII case.
 388                      
 389                              if (*q < 128)
 390                              {
 391                                  *p++ = *q++;
 392                                  n--;
 393                              }
 394                              else
 395                              {
 396                                  Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 397                      
 398                                  if (c > n || !isValid_U8(q, c) ||
 399                                      UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 400                                  {
 401                                      utf8_error_index = q - (const Uint8*)src;
 402 mike           1.112                 return size_t(-1);
 403                                  }
 404                      
 405                                  n -= c;
 406                              }
 407                          }
 408                      
 409                          return p - dest;
 410                      }
 411                      
 412 david.dillard  1.116 // Note: dest must be at least three times src (plus an extra byte for
 413 mike           1.112 // terminator).
 414                      static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 415                      {
 416                          // The following employs loop unrolling for efficiency. Please do not
 417                          // eliminate.
 418                      
 419                          const Uint16* q = src;
 420                          Uint8* p = (Uint8*)dest;
 421                      
 422                          while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 423 kumpf          1.82      {
 424 mike           1.112         p[0] = q[0];
 425                              p[1] = q[1];
 426                              p[2] = q[2];
 427                              p[3] = q[3];
 428                              p += 4;
 429                              q += 4;
 430                              n -= 4;
 431 kumpf          1.82      }
 432 mike           1.112 
 433                          switch (n)
 434                          {
 435                              case 0:
 436                                  return p - (Uint8*)dest;
 437                              case 1:
 438                                  if (q[0] < 128)
 439                                  {
 440                                      p[0] = q[0];
 441                                      return p + 1 - (Uint8*)dest;
 442                                  }
 443                                  break;
 444                              case 2:
 445                                  if (q[0] < 128 && q[1] < 128)
 446                                  {
 447                                      p[0] = q[0];
 448                                      p[1] = q[1];
 449                                      return p + 2 - (Uint8*)dest;
 450                                  }
 451                                  break;
 452                              case 3:
 453 mike           1.112             if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 454                                  {
 455                                      p[0] = q[0];
 456                                      p[1] = q[1];
 457                                      p[2] = q[2];
 458                                      return p + 3 - (Uint8*)dest;
 459                                  }
 460                                  break;
 461                          }
 462                      
 463                          // If this line was reached, there must be characters greater than 128.
 464                      
 465                          UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 466                      
 467                          return p - (Uint8*)dest;
 468 kumpf          1.54  }
 469                      
 470 mike           1.112 static inline size_t _convert(
 471                          Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
 472 kumpf          1.54  {
 473 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
 474                          _copy(p, q, n);
 475                          return n;
 476                      #else
 477                          return _copyFromUTF8(p, q, n, utf8_error_index);
 478                      #endif
 479 kumpf          1.54  }
 480                      
 481 mike           1.112 //==============================================================================
 482                      //
 483                      // class CString
 484                      //
 485                      //==============================================================================
 486                      
 487                      CString::CString(const CString& cstr) : _rep(0)
 488 kumpf          1.54  {
 489 mike           1.112     if (cstr._rep)
 490 kumpf          1.82      {
 491 mike           1.112         size_t n = strlen(cstr._rep) + 1;
 492                              _rep = (char*)operator new(n);
 493                              memcpy(_rep, cstr._rep, n);
 494 kumpf          1.82      }
 495 kumpf          1.54  }
 496                      
 497 kumpf          1.56  CString& CString::operator=(const CString& cstr)
 498                      {
 499 kumpf          1.82      if (&cstr != this)
 500 kumpf          1.81      {
 501 kumpf          1.82          if (_rep)
 502                              {
 503 mike           1.112             operator delete(_rep);
 504 kumpf          1.82              _rep = 0;
 505                              }
 506 mike           1.112 
 507 kumpf          1.82          if (cstr._rep)
 508                              {
 509 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 510                                  _rep = (char*)operator new(n);
 511                                  memcpy(_rep, cstr._rep, n);
 512 kumpf          1.82          }
 513 kumpf          1.81      }
 514 mike           1.112 
 515 kumpf          1.56      return *this;
 516                      }
 517                      
 518 mike           1.112 //==============================================================================
 519 kumpf          1.54  //
 520 mike           1.112 // class StringRep
 521 kumpf          1.39  //
 522 mike           1.112 //==============================================================================
 523 kumpf          1.39  
 524 mike           1.112 StringRep StringRep::_emptyRep;
 525 mike           1.27  
 526 mike           1.112 inline StringRep* StringRep::alloc(size_t cap)
 527 mike           1.27  {
 528 dave.sudlik    1.120     // Check for potential overflow in cap
 529                          PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 530 mike           1.27  
 531 mike           1.112     StringRep* rep = (StringRep*)::operator new(
 532                              sizeof(StringRep) + cap * sizeof(Uint16));
 533                          rep->cap = cap;
 534                          new(&rep->refs) AtomicInt(1);
 535                      
 536                          return rep;
 537 mike           1.27  }
 538                      
 539 mike           1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
 540 chuck          1.102 {
 541 mike           1.114     if (cap > rep->cap || rep->refs.get() != 1)
 542 chuck          1.102     {
 543 mike           1.112         size_t n = _roundUpToPow2(cap);
 544                              StringRep* newRep = StringRep::alloc(n);
 545                              newRep->size = rep->size;
 546                              _copy(newRep->data, rep->data, rep->size + 1);
 547                              StringRep::unref(rep);
 548                              rep = newRep;
 549                          }
 550                      }
 551 david.dillard  1.105 
 552 mike           1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
 553                      {
 554                          StringRep* rep = StringRep::alloc(size);
 555                          rep->size = size;
 556                          _copy(rep->data, data, size);
 557                          rep->data[size] = '\0';
 558                          return rep;
 559                      }
 560 chuck          1.102 
 561 mike           1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
 562                      {
 563                          // Return a new copy of rep. Release rep.
 564 chuck          1.102 
 565 mike           1.112     StringRep* newRep = StringRep::alloc(rep->size);
 566                          newRep->size = rep->size;
 567                          _copy(newRep->data, rep->data, rep->size);
 568                          newRep->data[newRep->size] = '\0';
 569                          StringRep::unref(rep);
 570                          return newRep;
 571 chuck          1.102 }
 572                      
 573 mike           1.112 StringRep* StringRep::create(const char* data, size_t size)
 574 kumpf          1.43  {
 575 mike           1.112     StringRep* rep = StringRep::alloc(size);
 576                          size_t utf8_error_index;
 577                          rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 578                      
 579                          if (rep->size == size_t(-1))
 580                          {
 581                              StringRep::free(rep);
 582 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 583 mike           1.112     }
 584 kumpf          1.43  
 585 mike           1.112     rep->data[rep->size] = '\0';
 586 kumpf          1.43  
 587 mike           1.112     return rep;
 588 mike           1.27  }
 589                      
 590 mike           1.112 Uint32 StringRep::length(const Uint16* str)
 591 mike           1.27  {
 592 mike           1.112     // Note: We could unroll this but it is rarely called.
 593                      
 594                          const Uint16* end = (Uint16*)str;
 595                      
 596                          while (*end++)
 597                              ;
 598                      
 599 a.dunfey       1.125     return (Uint32)(end - str - 1);
 600 kumpf          1.39  }
 601 tony           1.66  
 602 mike           1.112 //==============================================================================
 603                      //
 604                      // class String
 605                      //
 606                      //==============================================================================
 607                      
 608                      const String String::EMPTY;
 609 mike           1.27  
 610 kumpf          1.39  String::String(const String& str, Uint32 n)
 611                      {
 612 mike           1.112     _checkBounds(n, str._rep->size);
 613                          _rep = StringRep::create(str._rep->data, n);
 614 kumpf          1.39  }
 615                      
 616                      String::String(const Char16* str)
 617                      {
 618 mike           1.112     _checkNullPointer(str);
 619                          _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 620 mike           1.27  }
 621                      
 622 kumpf          1.39  String::String(const Char16* str, Uint32 n)
 623                      {
 624 mike           1.112     _checkNullPointer(str);
 625                          _rep = StringRep::create((Uint16*)str, n);
 626 kumpf          1.39  }
 627                      
 628                      String::String(const char* str)
 629 mike           1.27  {
 630 mike           1.112     _checkNullPointer(str);
 631 david.dillard  1.105 
 632 mike           1.112     // Set this just in case create() throws an exception.
 633                          _rep = &StringRep::_emptyRep;
 634                          _rep = StringRep::create(str, strlen(str));
 635 mike           1.27  }
 636                      
 637 kumpf          1.39  String::String(const char* str, Uint32 n)
 638 mike           1.27  {
 639 mike           1.112     _checkNullPointer(str);
 640 david.dillard  1.105 
 641 mike           1.112     // Set this just in case create() throws an exception.
 642                          _rep = &StringRep::_emptyRep;
 643                          _rep = StringRep::create(str, n);
 644 kumpf          1.39  }
 645 mike           1.27  
 646 mike           1.112 String::String(const String& s1, const String& s2)
 647 kumpf          1.39  {
 648 mike           1.112     size_t n1 = s1._rep->size;
 649                          size_t n2 = s2._rep->size;
 650                          size_t n = n1 + n2;
 651                          _rep = StringRep::alloc(n);
 652                          _copy(_rep->data, s1._rep->data, n1);
 653                          _copy(_rep->data + n1, s2._rep->data, n2);
 654                          _rep->size = n;
 655                          _rep->data[n] = '\0';
 656 mike           1.27  }
 657                      
 658 mike           1.112 String::String(const String& s1, const char* s2)
 659 mike           1.27  {
 660 mike           1.112     _checkNullPointer(s2);
 661                          size_t n1 = s1._rep->size;
 662                          size_t n2 = strlen(s2);
 663                          _rep = StringRep::alloc(n1 + n2);
 664                          _copy(_rep->data, s1._rep->data, n1);
 665                          size_t utf8_error_index;
 666                          size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 667                      
 668                          if (tmp == size_t(-1))
 669 kumpf          1.82      {
 670 mike           1.112         StringRep::free(_rep);
 671                              _rep = &StringRep::_emptyRep;
 672 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 673 kumpf          1.82      }
 674 mike           1.112 
 675                          _rep->size = n1 + tmp;
 676                          _rep->data[_rep->size] = '\0';
 677 mike           1.27  }
 678                      
 679 mike           1.112 String::String(const char* s1, const String& s2)
 680 mike           1.27  {
 681 mike           1.112     _checkNullPointer(s1);
 682                          size_t n1 = strlen(s1);
 683                          size_t n2 = s2._rep->size;
 684                          _rep = StringRep::alloc(n1 + n2);
 685                          size_t utf8_error_index;
 686                          size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 687                      
 688                          if (tmp ==  size_t(-1))
 689                          {
 690                              StringRep::free(_rep);
 691                              _rep = &StringRep::_emptyRep;
 692 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 693 mike           1.112     }
 694                      
 695                          _rep->size = n2 + tmp;
 696                          _copy(_rep->data + n1, s2._rep->data, n2);
 697                          _rep->data[_rep->size] = '\0';
 698 mike           1.27  }
 699                      
 700 mike           1.112 String& String::assign(const String& str)
 701 mike           1.27  {
 702 mike           1.112     if (_rep != str._rep)
 703 david.dillard  1.105     {
 704 mike           1.112         StringRep::unref(_rep);
 705                              StringRep::ref(_rep = str._rep);
 706 david.dillard  1.105     }
 707                      
 708 mike           1.27      return *this;
 709                      }
 710                      
 711                      String& String::assign(const Char16* str, Uint32 n)
 712                      {
 713 mike           1.112     _checkNullPointer(str);
 714                      
 715 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 716 david.dillard  1.105     {
 717 mike           1.112         StringRep::unref(_rep);
 718                              _rep = StringRep::alloc(n);
 719 david.dillard  1.105     }
 720                      
 721 mike           1.112     _rep->size = n;
 722                          _copy(_rep->data, (Uint16*)str, n);
 723                          _rep->data[n] = '\0';
 724                      
 725 mike           1.27      return *this;
 726                      }
 727                      
 728 mike           1.112 String& String::assign(const char* str, Uint32 n)
 729 chuck          1.102 {
 730 mike           1.112     _checkNullPointer(str);
 731                      
 732 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 733 david.dillard  1.105     {
 734 mike           1.112         StringRep::unref(_rep);
 735                              _rep = StringRep::alloc(n);
 736 david.dillard  1.105     }
 737                      
 738 mike           1.112     size_t utf8_error_index;
 739                          _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 740 chuck          1.102 
 741 mike           1.112     if (_rep->size ==  size_t(-1))
 742 david.dillard  1.105     {
 743 mike           1.112         StringRep::free(_rep);
 744                              _rep = &StringRep::_emptyRep;
 745 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 746 david.dillard  1.105     }
 747 mike           1.112 
 748                          _rep->data[_rep->size] = 0;
 749 david.dillard  1.105 
 750 mike           1.27      return *this;
 751                      }
 752                      
 753 kumpf          1.39  void String::clear()
 754                      {
 755 mike           1.112     if (_rep->size)
 756                          {
 757 mike           1.114         if (_rep->refs.get() == 1)
 758 mike           1.112         {
 759                                  _rep->size = 0;
 760                                  _rep->data[0] = '\0';
 761                              }
 762                              else
 763                              {
 764                                  StringRep::unref(_rep);
 765                                  _rep = &StringRep::_emptyRep;
 766                              }
 767                          }
 768 kumpf          1.39  }
 769                      
 770 mike           1.112 void String::reserveCapacity(Uint32 cap)
 771 kumpf          1.39  {
 772 mike           1.112     _reserve(_rep, cap);
 773 kumpf          1.39  }
 774                      
 775 mike           1.112 CString String::getCString() const
 776                      {
 777 david.dillard  1.116     // A UTF8 string can have three times as many characters as its UTF16
 778                          // counterpart, so we allocate extra memory for the worst case. In the
 779 mike           1.112     // best case, we may need only one third of the memory allocated. But
 780 david.dillard  1.116     // downsizing the string afterwards is expensive and unecessary since
 781                          // CString objects are usually short-lived (disappearing after only a few
 782 mike           1.112     // instructions). CString objects are typically created on the stack as
 783                          // means to obtain a char* pointer.
 784                      
 785                      #ifdef PEGASUS_STRING_NO_UTF8
 786                          char* str = (char*)operator new(_rep->size + 1);
 787                          _copy(str, _rep->data, _rep->size);
 788                          str[_rep->size] = '\0';
 789                          return CString(str);
 790 gs.keenan      1.110 #else
 791 a.dunfey       1.125     Uint32 n = (Uint32)(3 * _rep->size);
 792 mike           1.112     char* str = (char*)operator new(n + 1);
 793                          size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 794                          str[size] = '\0';
 795                          return CString(str);
 796 gs.keenan      1.110 #endif
 797 kumpf          1.39  }
 798                      
 799 mike           1.112 String& String::append(const Char16* str, Uint32 n)
 800 kumpf          1.39  {
 801 mike           1.112     _checkNullPointer(str);
 802                      
 803                          size_t oldSize = _rep->size;
 804                          size_t newSize = oldSize + n;
 805 a.dunfey       1.125     _reserve(_rep, (Uint32)newSize);
 806 mike           1.112     _copy(_rep->data + oldSize, (Uint16*)str, n);
 807                          _rep->size = newSize;
 808                          _rep->data[newSize] = '\0';
 809                      
 810                          return *this;
 811 kumpf          1.39  }
 812                      
 813 mike           1.112 String& String::append(const String& str)
 814 mike           1.27  {
 815 a.dunfey       1.125     return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
 816 mike           1.27  }
 817                      
 818 mike           1.112 String& String::append(const char* str, Uint32 size)
 819 mike           1.27  {
 820 mike           1.112     _checkNullPointer(str);
 821                      
 822                          size_t oldSize = _rep->size;
 823                          size_t cap = oldSize + size;
 824                      
 825 a.dunfey       1.125     _reserve(_rep, (Uint32)cap);
 826 mike           1.112     size_t utf8_error_index;
 827                          size_t tmp = _convert(
 828                              (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 829                      
 830                          if (tmp ==  size_t(-1))
 831                          {
 832                              StringRep::free(_rep);
 833                              _rep = &StringRep::_emptyRep;
 834 a.dunfey       1.125         _StringThrowBadUTF8((Uint32)utf8_error_index);
 835 mike           1.112     }
 836 mike           1.27  
 837 mike           1.112     _rep->size += tmp;
 838                          _rep->data[_rep->size] = '\0';
 839 mike           1.27  
 840 kumpf          1.39      return *this;
 841                      }
 842                      
 843 mike           1.112 void String::remove(Uint32 index, Uint32 n)
 844 mike           1.27  {
 845 mike           1.112     if (n == PEG_NOT_FOUND)
 846 a.dunfey       1.125         n = (Uint32)(_rep->size - index);
 847 mike           1.112 
 848                          _checkBounds(index + n, _rep->size);
 849                      
 850 mike           1.114     if (_rep->refs.get() != 1)
 851 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 852 mike           1.27  
 853 jim.wunderlich 1.115     PEGASUS_ASSERT(index + n <= _rep->size);
 854 mike           1.27  
 855 mike           1.112     size_t rem = _rep->size - (index + n);
 856                          Uint16* data = _rep->data;
 857 mike           1.27  
 858 mike           1.112     if (rem)
 859                              memmove(data + index, data + index + n, rem * sizeof(Uint16));
 860 mike           1.27  
 861 mike           1.112     _rep->size -= n;
 862                          data[_rep->size] = '\0';
 863 mike           1.27  }
 864                      
 865 mike           1.112 String String::subString(Uint32 index, Uint32 n) const
 866 mike           1.27  {
 867 mike           1.112     // Note: this implementation is very permissive but used for
 868                          // backwards compatibility.
 869                      
 870                          if (index < _rep->size)
 871 mike           1.27      {
 872 mike           1.112         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 873 a.dunfey       1.125             n = (Uint32)(_rep->size - index);
 874 mike           1.27  
 875 w.otsuka       1.121         return String((Char16*)(_rep->data + index), n);
 876 mike           1.27      }
 877 david.dillard  1.105 
 878                          return String();
 879 mike           1.27  }
 880                      
 881                      Uint32 String::find(Char16 c) const
 882                      {
 883 mike           1.112     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 884 mike           1.27  
 885 mike           1.112     if (p)
 886 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 887 mike           1.27  
 888                          return PEG_NOT_FOUND;
 889                      }
 890                      
 891 kumpf          1.53  Uint32 String::find(Uint32 index, Char16 c) const
 892 mike           1.30  {
 893 mike           1.112     _checkBounds(index, _rep->size);
 894                      
 895                          if (index >= _rep->size)
 896                              return PEG_NOT_FOUND;
 897                      
 898                          Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 899 mike           1.30  
 900 mike           1.112     if (p)
 901 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 902 mike           1.30  
 903                          return PEG_NOT_FOUND;
 904                      }
 905                      
 906 mike           1.112 Uint32 StringFindAux(
 907                          const StringRep* _rep, const Char16* s, Uint32 n)
 908 mike           1.27  {
 909 mike           1.112     _checkNullPointer(s);
 910 mike           1.27  
 911 mike           1.112     const Uint16* data = _rep->data;
 912                          size_t rem = _rep->size;
 913                      
 914                          while (n <= rem)
 915 mike           1.30      {
 916 mike           1.112         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 917                      
 918                              if (!p)
 919                                  break;
 920 mike           1.30  
 921 mike           1.112         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 922 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 923 david.dillard  1.105 
 924 mike           1.112         p++;
 925                              rem -= p - data;
 926                              data = p;
 927 mike           1.27      }
 928 mike           1.112 
 929 mike           1.27      return PEG_NOT_FOUND;
 930                      }
 931                      
 932 mike           1.112 Uint32 String::find(const char* s) const
 933                      {
 934                          _checkNullPointer(s);
 935                      
 936                          // Note: could optimize away creation of temporary, but this is rarely
 937                          // called.
 938                          return find(String(s));
 939                      }
 940                      
 941 mike           1.27  Uint32 String::reverseFind(Char16 c) const
 942                      {
 943 mike           1.112     Uint16 x = c;
 944                          Uint16* p = _rep->data;
 945                          Uint16* q = _rep->data + _rep->size;
 946 mike           1.27  
 947 mike           1.112     while (q != p)
 948 mike           1.27      {
 949 mike           1.112         if (*--q == x)
 950 david.dillard  1.116             return static_cast<Uint32>(q - p);
 951 mike           1.27      }
 952                      
 953                          return PEG_NOT_FOUND;
 954                      }
 955                      
 956                      void String::toLower()
 957                      {
 958 david          1.69  #ifdef PEGASUS_HAS_ICU
 959 mike           1.112 
 960 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 961 david          1.90      {
 962 mike           1.114         if (_rep->refs.get() != 1)
 963 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 964                      
 965 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 966 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 967                              // prevents optimizations where the us-ascii is converted before
 968 mike           1.112         // calling ICU.
 969 yi.zhou        1.108         // The string may shrink or expand after the convert.
 970                      
 971 mike           1.112         //// First calculate size of resulting string. u_strToLower() returns
 972                              //// only the size when zero is passed as the destination size argument.
 973                      
 974 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 975                      
 976 mike           1.112         int32_t newSize = u_strToLower(
 977                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 978 david.dillard  1.116 
 979 mike           1.112         err = U_ZERO_ERROR;
 980                      
 981                              //// Reserve enough space for the result.
 982                      
 983                              if ((Uint32)newSize > _rep->cap)
 984                                  _reserve(_rep, newSize);
 985                      
 986                              //// Perform the conversion (overlapping buffers are allowed).
 987 chuck          1.99  
 988 mike           1.112         u_strToLower((UChar*)_rep->data, newSize,
 989                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 990 yi.zhou        1.108 
 991 mike           1.112         _rep->size = newSize;
 992                              return;
 993 david          1.90      }
 994 mike           1.112 
 995                      #endif /* PEGASUS_HAS_ICU */
 996                      
 997 mike           1.114     if (_rep->refs.get() != 1)
 998 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 999                      
1000                          Uint16* p = _rep->data;
1001                          size_t n = _rep->size;
1002                      
1003                          for (; n--; p++)
1004 david          1.90      {
1005 mike           1.112         if (!(*p & 0xFF00))
1006                                  *p = _toLower(*p);
1007 mike           1.27      }
1008 kumpf          1.39  }
1009                      
1010 chuck          1.99  void String::toUpper()
1011 david          1.90  {
1012                      #ifdef PEGASUS_HAS_ICU
1013 mike           1.112 
1014 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1015 chuck          1.99      {
1016 mike           1.114         if (_rep->refs.get() != 1)
1017 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1018                      
1019 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
1020 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
1021                              // prevents optimizations where the us-ascii is converted before
1022 mike           1.112         // calling ICU.
1023 yi.zhou        1.108         // The string may shrink or expand after the convert.
1024                      
1025 mike           1.112         //// First calculate size of resulting string. u_strToUpper() returns
1026                              //// only the size when zero is passed as the destination size argument.
1027                      
1028 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
1029                      
1030 mike           1.112         int32_t newSize = u_strToUpper(
1031                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1032                      
1033                              err = U_ZERO_ERROR;
1034                      
1035                              //// Reserve enough space for the result.
1036                      
1037                              if ((Uint32)newSize > _rep->cap)
1038                                  _reserve(_rep, newSize);
1039                      
1040                              //// Perform the conversion (overlapping buffers are allowed).
1041                      
1042                              u_strToUpper((UChar*)_rep->data, newSize,
1043                                  (UChar*)_rep->data, _rep->size, NULL, &err);
1044 chuck          1.99  
1045 mike           1.112         _rep->size = newSize;
1046 yi.zhou        1.108 
1047 mike           1.112         return;
1048 david          1.91      }
1049 mike           1.112 
1050                      #endif /* PEGASUS_HAS_ICU */
1051                      
1052 mike           1.114     if (_rep->refs.get() != 1)
1053 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
1054                      
1055                          Uint16* p = _rep->data;
1056                          size_t n = _rep->size;
1057                      
1058                          for (; n--; p++)
1059                              *p = _toUpper(*p);
1060 david          1.90  }
1061                      
1062 kumpf          1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
1063 kumpf          1.39  {
1064 kumpf          1.118     const Uint16* p1 = s1._rep->data;
1065                          const Uint16* p2 = s2._rep->data;
1066 mike           1.27  
1067 kumpf          1.118     while (n--)
1068                          {
1069                              int r = *p1++ - *p2++;
1070                              if (r)
1071                              {
1072                                  return r;
1073                              }
1074                              else if (!p1[-1])
1075                              {
1076                                  // We must have encountered a null terminator in both s1 and s2
1077                                  return 0;
1078                              }
1079                          }
1080                          return 0;
1081 mike           1.27  }
1082                      
1083 kumpf          1.43  int String::compare(const String& s1, const String& s2)
1084 mike           1.30  {
1085 mike           1.112     return _compare(s1._rep->data, s2._rep->data);
1086                      }
1087 kumpf          1.43  
1088 mike           1.112 int String::compare(const String& s1, const char* s2)
1089                      {
1090                          _checkNullPointer(s2);
1091 mike           1.30  
1092 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
1093                          return _compareNoUTF8(s1._rep->data, s2);
1094                      #else
1095                          // ATTN: optimize this!
1096                          return String::compare(s1, String(s2));
1097                      #endif
1098 mike           1.30  }
1099                      
1100 mike           1.112 int String::compareNoCase(const String& str1, const String& str2)
1101 kumpf          1.40  {
1102 david          1.69  #ifdef PEGASUS_HAS_ICU
1103 mike           1.112 
1104 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1105                          {
1106 mike           1.112         return  u_strcasecmp(
1107 dave.sudlik    1.124             (const UChar*)str1._rep->data,
1108                                  (const UChar*)str2._rep->data,
1109                                  U_FOLD_CASE_DEFAULT
1110                                  );
1111 yi.zhou        1.108     }
1112 kumpf          1.40  
1113 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1114                      
1115                          const Uint16* s1 = str1._rep->data;
1116                          const Uint16* s2 = str2._rep->data;
1117                      
1118                          while (*s1 && *s2)
1119 kumpf          1.40      {
1120 mike           1.112         int r = _toLower(*s1++) - _toLower(*s2++);
1121 kumpf          1.40  
1122 david.dillard  1.105         if (r)
1123                                  return r;
1124 kumpf          1.40      }
1125                      
1126 mike           1.112     if (*s2)
1127 david.dillard  1.105         return -1;
1128 mike           1.112     else if (*s1)
1129 david.dillard  1.105         return 1;
1130 kumpf          1.40  
1131                          return 0;
1132                      }
1133                      
1134 mike           1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1135 mike           1.27  {
1136 mike           1.112 #ifdef PEGASUS_HAS_ICU
1137                      
1138                          return String::compareNoCase(s1, s2) == 0;
1139                      
1140                      #else /* PEGASUS_HAS_ICU */
1141 mike           1.27  
1142 mike           1.112     // The following employs loop unrolling for efficiency. Please do not
1143                          // eliminate.
1144 kumpf          1.39  
1145 mike           1.112     Uint16* p = (Uint16*)s1.getChar16Data();
1146                          Uint16* q = (Uint16*)s2.getChar16Data();
1147                          Uint32 n = s2.size();
1148                      
1149                          while (n >= 8)
1150                          {
1151                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1152                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1153                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1154                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1155                                  ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1156                                  ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1157                                  ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1158                                  ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1159                              {
1160                                  return false;
1161                              }
1162 kumpf          1.39  
1163 mike           1.112         n -= 8;
1164                              p += 8;
1165                              q += 8;
1166                          }
1167 mike           1.27  
1168 mike           1.112     while (n >= 4)
1169 kumpf          1.39      {
1170 mike           1.112         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1171                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1172                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1173                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1174 david.dillard  1.105         {
1175 mike           1.112             return false;
1176 david.dillard  1.105         }
1177 mike           1.112 
1178                              n -= 4;
1179                              p += 4;
1180                              q += 4;
1181                          }
1182                      
1183                          while (n--)
1184                          {
1185                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1186 david.dillard  1.105             return false;
1187 mike           1.112 
1188                              p++;
1189                              q++;
1190 kumpf          1.39      }
1191 mike           1.28  
1192 kumpf          1.39      return true;
1193 mike           1.112 
1194                      #endif /* PEGASUS_HAS_ICU */
1195 david          1.69  }
1196                      
1197 mike           1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1198 david          1.69  {
1199 mike           1.112     _checkNullPointer(s2);
1200 david          1.69  
1201 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1202 david          1.69  
1203 mike           1.112     return String::equalNoCase(s1, String(s2));
1204 david          1.69  
1205 mike           1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1206 david          1.69  
1207 mike           1.112     const Uint16* p1 = (Uint16*)s1._rep->data;
1208                          const char* p2 = s2;
1209                          size_t n = s1._rep->size;
1210 david.dillard  1.105 
1211 mike           1.112     while (n--)
1212                          {
1213                              if (!*p2)
1214                                  return false;
1215 david          1.71  
1216 mike           1.112         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1217                                  return false;
1218                          }
1219 kumpf          1.42  
1220 mike           1.112     if (*p2)
1221                              return false;
1222 david.dillard  1.116 
1223 mike           1.112     return true;
1224 karl           1.36  
1225 mike           1.112 #else /* PEGASUS_HAS_ICU */
1226 david.dillard  1.105 
1227 mike           1.112     // ATTN: optimize this!
1228                          return String::equalNoCase(s1, String(s2));
1229 david.dillard  1.105 
1230 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1231                      }
1232 chuck          1.78  
1233 mike           1.112 Boolean String::equal(const String& s1, const String& s2)
1234 karl           1.36  {
1235 david.dillard  1.116     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1236 mike           1.112         s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1237 karl           1.36  }
1238                      
1239 mike           1.112 Boolean String::equal(const String& s1, const char* s2)
1240                      {
1241                      #ifdef PEGASUS_STRING_NO_UTF8
1242 kumpf          1.35  
1243 mike           1.112     _checkNullPointer(s2);
1244 kumpf          1.39  
1245 mike           1.112     const Uint16* p = (Uint16*)s1._rep->data;
1246                          const char* q = s2;
1247 kumpf          1.39  
1248 mike           1.112     while (*p && *q)
1249                          {
1250                              if (*p++ != Uint16(*q++))
1251                                  return false;
1252                          }
1253 kumpf          1.39  
1254 mike           1.112     return !(*p || *q);
1255 kumpf          1.39  
1256 mike           1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1257 kumpf          1.39  
1258 mike           1.112     return String::equal(s1, String(s2));
1259 kumpf          1.39  
1260 mike           1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1261 kumpf          1.39  }
1262                      
1263 kumpf          1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1264 kumpf          1.39  {
1265 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1266 david          1.69  
1267 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1268                          {
1269 david.dillard  1.105         char *buf = NULL;
1270                              const int size = str.size() * 6;
1271 mike           1.112         UnicodeString UniStr(
1272                                  (const UChar *)str.getChar16Data(), (int32_t)str.size());
1273 david.dillard  1.105         Uint32 bufsize = UniStr.extract(0,size,buf);
1274                              buf = new char[bufsize+1];
1275                              UniStr.extract(0,bufsize,buf);
1276                              os << buf;
1277                              os.flush();
1278                              delete [] buf;
1279 david.dillard  1.116         return os;
1280 yi.zhou        1.108     }
1281 mike           1.112 
1282 david.dillard  1.116 #endif  // PEGASUS_HAS_ICU
1283 mike           1.112 
1284                          for (Uint32 i = 0, n = str.size(); i < n; i++)
1285 yi.zhou        1.108     {
1286 mike           1.112         Uint16 code = str[i];
1287 david.dillard  1.105 
1288 mike           1.112         if (code > 0 && !(code & 0xFF00))
1289                                      os << char(code);
1290                              else
1291                                  {
1292                                  // Print in hex format:
1293                                  char buffer[8];
1294                                  sprintf(buffer, "\\x%04X", code);
1295                                  os << buffer;
1296 david.dillard  1.105         }
1297 yi.zhou        1.108     }
1298 kumpf          1.39  
1299                          return os;
1300                      }
1301                      
1302 mike           1.112 void StringAppendCharAux(StringRep*& _rep)
1303 kumpf          1.39  {
1304 mike           1.112     StringRep* tmp;
1305                      
1306                          if (_rep->cap)
1307                          {
1308                              tmp = StringRep::alloc(2 * _rep->cap);
1309                              tmp->size = _rep->size;
1310                              _copy(tmp->data, _rep->data, _rep->size);
1311                          }
1312                          else
1313                          {
1314                              tmp = StringRep::alloc(8);
1315                              tmp->size = 0;
1316                          }
1317                      
1318                          StringRep::unref(_rep);
1319                          _rep = tmp;
1320 kumpf          1.39  }
1321                      
1322 thilo.boehm    1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1323                      {
1324                          class StringLayout
1325                          {
1326                          public:
1327                              StringRep* rep;
1328                          };
1329                      
1330                          StringLayout* that = (StringLayout*)&s;
1331                      
1332                          _checkNullPointer(str);
1333                      
1334                          if (n > that->rep->cap || that->rep->refs.get() != 1)
1335                          {
1336                              StringRep::unref(that->rep);
1337                              that->rep = StringRep::alloc(n);
1338                          }
1339                      
1340                          _copy(that->rep->data, str, n);
1341                          that->rep->size = n;
1342                          that->rep->data[that->rep->size] = 0;
1343 thilo.boehm    1.128 }
1344                      
1345 mike           1.112 PEGASUS_NAMESPACE_END
1346                      
1347                      /*
1348                      ================================================================================
1349                      
1350                      String optimizations:
1351                      
1352                          1.  Added mechanism allowing certain functions to be inlined only when
1353                              used by internal Pegasus modules. External modules (i.e., providers)
1354                              link to a non-inline version, which allows for binary compatibility.
1355                      
1356                          2.  Implemented copy-on-write with atomic increment/decrement. This
1357                              yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1358                              for the 'ni1000' benchmark.
1359                      
1360                          3.  Employed loop unrolling in several places. For example, see:
1361                      
1362                                  static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1363                      
1364                          4.  Used the "empty-rep" optimization (described in whitepaper from the
1365                              GCC Developers Summit). This reduced default construction to a simple
1366 mike           1.112         pointer assignment.
1367                      
1368                                  inline String::String() : _rep(&_emptyRep) { }
1369                      
1370                          5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1371                              For example:
1372                      
1373                                  static const char _upper[] =
1374                                  {
1375                                      0,1,2,...255
1376                                  };
1377                      
1378                                  inline Uint16 _toUpper(Uint16 x)
1379                                  {
1380                                      return (x & 0xFF00) ? x : _upper[x];
1381                                  }
1382                      
1383 david.dillard  1.116         This outperforms the system implementation by avoiding an anding
1384 mike           1.112         operation.
1385                      
1386 david.dillard  1.116     6.  Implemented char* version of the following member functions to
1387                              eliminate unecessary creation of anonymous string objects
1388 mike           1.112         (temporaries).
1389                      
1390                                  String(const String& s1, const char* s2);
1391                                  String(const char* s1, const String& s2);
1392                                  String& String::operator=(const char* str);
1393                                  Uint32 String::find(const char* s) const;
1394                                  bool String::equal(const String& s1, const char* s2);
1395                                  static int String::compare(const String& s1, const char* s2);
1396                                  String& String::append(const char* str);
1397                                  String& String::append(const char* str, Uint32 size);
1398                                  static bool String::equalNoCase(const String& s1, const char* s2);
1399                                  String& operator=(const char* str)
1400                                  String& String::assign(const char* str)
1401                                  String& String::append(const char* str)
1402                                  Boolean operator==(const String& s1, const char* s2)
1403                                  Boolean operator==(const char* s1, const String& s2)
1404                                  Boolean operator!=(const String& s1, const char* s2)
1405                                  Boolean operator!=(const char* s1, const String& s2)
1406                                  Boolean operator<(const String& s1, const char* s2)
1407                                  Boolean operator<(const char* s1, const String& s2)
1408                                  Boolean operator>(const String& s1, const char* s2)
1409 mike           1.112             Boolean operator>(const char* s1, const String& s2)
1410                                  Boolean operator<=(const String& s1, const char* s2)
1411                                  Boolean operator<=(const char* s1, const String& s2)
1412                                  Boolean operator>=(const String& s1, const char* s2)
1413                                  Boolean operator>=(const char* s1, const String& s2)
1414                                  String operator+(const String& s1, const char* s2)
1415                                  String operator+(const char* s1, const String& s2)
1416                      
1417 david.dillard  1.116     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1418 mike           1.112         power of two (algorithm from the book "Hacker's Delight").
1419                      
1420                                  static Uint32 _roundUpToPow2(Uint32 x)
1421                                  {
1422                                      if (x < 8)
1423                                          return 8;
1424                      
1425                                      x--;
1426                                      x |= (x >> 1);
1427                                      x |= (x >> 2);
1428                                      x |= (x >> 4);
1429                                      x |= (x >> 8);
1430                                      x |= (x >> 16);
1431                                      x++;
1432                      
1433                                      return x;
1434                                  }
1435                      
1436                          8.  Implemented "concatenating constructors" to eliminate temporaries
1437 david.dillard  1.116         created by operator+(). This scheme employs the "return-value
1438 mike           1.112         optimization" described by Stan Lippman.
1439                      
1440                                  inline String operator+(const String& s1, const String& s2)
1441                                  {
1442                                      return String(s1, s2, 0);
1443                                  }
1444                      
1445                          9.  Experimented to find the optimial initial size for a short string.
1446                              Eight seems to offer the best tradeoff between space and time.
1447                      
1448                          10. Inlined all members of the Char16 class.
1449                      
1450                          11. Used Uint16 internally in the String class. This showed no improvememnt
1451                              since Char16 was already fully inlined and was essentially reduced to
1452                              Uint16 in any case.
1453                      
1454                          12. Implemented conditional logic (#if) allowing error checking logic to
1455 david.dillard  1.116         be excluded to better performance. Examples include bounds checking
1456 mike           1.112         and null-pointer checking.
1457                      
1458                          13. Used memcpy() and memcmp() where possible. These are implemented using
1459                              the rep family of intructions under Intel and are much faster.
1460                      
1461 david.dillard  1.116     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1462 mike           1.112         copy routine overhead.
1463                      
1464                          15. Added ASCII7 form of the constructor and assign().
1465                      
1466                                  String s("hello world", String::ASCII7);
1467                      
1468                                  s.assignASCII7("hello world");
1469                      
1470                              This avoids slower UTF8 processing when not needed.
1471                      
1472                      ================================================================================
1473                      */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2