(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.119 //%2006////////////////////////////////////////////////////////////////////////
   2 mike  1.27  //
   3 karl  1.97  // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4             // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5             // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85  // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97  // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8             // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98  // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10             // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 karl  1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  12             // EMC Corporation; Symantec Corporation; The Open Group.
  13 mike  1.27  //
  14             // Permission is hereby granted, free of charge, to any person obtaining a copy
  15 kumpf 1.41  // of this software and associated documentation files (the "Software"), to
  16             // deal in the Software without restriction, including without limitation the
  17             // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  18 mike  1.27  // sell copies of the Software, and to permit persons to whom the Software is
  19             // furnished to do so, subject to the following conditions:
  20 karl  1.119 // 
  21 kumpf 1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  22 mike  1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  23             // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  24 kumpf 1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  25             // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  26             // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  27 mike  1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28             // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29             //
  30             //==============================================================================
  31             //
  32 mike  1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
  33 mike  1.27  //
  34 david.dillard 1.116 // Modified By:
  35 mike          1.112 //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  36                     //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  37 david.dillard 1.116 //     David Dillard, Symantec Corp. (david_dillard@symantec.com)
  38 mike          1.112 //     Mike Brasher (mike-brasher@austin.rr.com)
  39 mike          1.27  //
  40                     //%/////////////////////////////////////////////////////////////////////////////
  41                     
  42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  43 mike           1.113 #include <cstring>
  44 kumpf          1.48  #include "InternalException.h"
  45 david          1.69  #include "CommonUTF.h"
  46 mike           1.112 #include "MessageLoader.h"
  47                      #include "StringRep.h"
  48 david          1.69  
  49                      #ifdef PEGASUS_HAS_ICU
  50 chuck          1.99  #include <unicode/ustring.h>
  51                      #include <unicode/uchar.h>
  52 david          1.69  #endif
  53                      
  54 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  55 mike           1.28  
  56 mike           1.112 //==============================================================================
  57                      //
  58                      // Compile-time macros (undefined by default).
  59                      //
  60                      //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  61                      //
  62                      //==============================================================================
  63 mike           1.27  
  64 mike           1.112 //==============================================================================
  65 kumpf          1.39  //
  66 mike           1.112 // File-scope definitions:
  67 kumpf          1.54  //
  68 mike           1.112 //==============================================================================
  69                      
  70                      // Note: this table is much faster than the system toupper(). Please do not
  71                      // change.
  72 kumpf          1.54  
  73 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  74 kumpf          1.54  {
  75 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  76                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  77                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  78                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  79                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  80                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  81                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  82                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  83                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  84                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  85                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  86                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  87                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  88                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  89                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  90                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  91                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  92                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  93                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  94                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  95                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  96 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  97                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  98                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  99                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 100                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 101                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 102                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 103                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 104                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 105                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 106                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 107                      };
 108                      
 109                      // Note: this table is much faster than the system tulower(). Please do not
 110                      // change.
 111                      
 112 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 113 mike           1.112 {
 114                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 115                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 116                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 117                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 118                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 119                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 120                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 121                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 122                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 123                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 124                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 125                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 126                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 127                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 128                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 129                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 130                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 131                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 132                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 133                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 134 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 135                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 136                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 137                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 138                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 139                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 140                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 141                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 142                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 143                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 144                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 145                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 146                      };
 147                      
 148                      // Converts 16-bit characters to upper case. This routine is faster than the
 149                      // system toupper(). Please do not change.
 150                      inline Uint16 _toUpper(Uint16 x)
 151                      {
 152                          return (x & 0xFF00) ? x : _toUpperTable[x];
 153 kumpf          1.54  }
 154                      
 155 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 156                      // system toupper(). Please do not change.
 157                      inline Uint16 _toLower(Uint16 x)
 158 kumpf          1.54  {
 159 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 160                      }
 161                      
 162                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 163                      static Uint32 _roundUpToPow2(Uint32 x)
 164                      {
 165 dave.sudlik    1.120     // Check for potential overflow in x
 166                          PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 167 mike           1.112 
 168                          if (x < 8)
 169                              return 8;
 170                      
 171                          x--;
 172                          x |= (x >> 1);
 173                          x |= (x >> 2);
 174                          x |= (x >> 4);
 175                          x |= (x >> 8);
 176                          x |= (x >> 16);
 177                          x++;
 178                      
 179                          return x;
 180                      }
 181                      
 182                      template<class P, class Q>
 183                      static void _copy(P* p, const Q* q, size_t n)
 184                      {
 185                          // The following employs loop unrolling for efficiency. Please do not
 186                          // eliminate.
 187                      
 188 mike           1.112     while (n >= 8)
 189                          {
 190                              p[0] = q[0];
 191                              p[1] = q[1];
 192                              p[2] = q[2];
 193                              p[3] = q[3];
 194                              p[4] = q[4];
 195                              p[5] = q[5];
 196                              p[6] = q[6];
 197                              p[7] = q[7];
 198                              p += 8;
 199                              q += 8;
 200                              n -= 8;
 201                          }
 202                      
 203                          while (n >= 4)
 204                          {
 205                              p[0] = q[0];
 206                              p[1] = q[1];
 207                              p[2] = q[2];
 208                              p[3] = q[3];
 209 mike           1.112         p += 4;
 210                              q += 4;
 211                              n -= 4;
 212                          }
 213                      
 214                          while (n--)
 215                              *p++ = *q++;
 216                      }
 217                      
 218                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 219                      {
 220                          // The following employs loop unrolling for efficiency. Please do not
 221                          // eliminate.
 222                      
 223                          while (n >= 4)
 224                          {
 225                              if (s[0] == c)
 226                                  return (Uint16*)s;
 227                              if (s[1] == c)
 228                                  return (Uint16*)&s[1];
 229                              if (s[2] == c)
 230 mike           1.112             return (Uint16*)&s[2];
 231                              if (s[3] == c)
 232                                  return (Uint16*)&s[3];
 233 kumpf          1.82  
 234 mike           1.112         n -= 4;
 235                              s += 4;
 236                          }
 237                      
 238                          if (n)
 239                          {
 240                              if (*s == c)
 241                                  return (Uint16*)s;
 242                              s++;
 243                              n--;
 244                          }
 245                      
 246                          if (n)
 247                          {
 248                              if (*s == c)
 249                                  return (Uint16*)s;
 250                              s++;
 251                              n--;
 252                          }
 253                      
 254                          if (n && *s == c)
 255 mike           1.112         return (Uint16*)s;
 256                      
 257                          // Not found!
 258                          return 0;
 259                      }
 260                      
 261                      static int _compare(const Uint16* s1, const Uint16* s2)
 262                      {
 263                          while (*s1 && *s2)
 264                          {
 265                              int r = *s1++ - *s2++;
 266                      
 267                              if (r)
 268                                  return r;
 269                          }
 270                      
 271                          if (*s2)
 272                              return -1;
 273                          else if (*s1)
 274                              return 1;
 275                      
 276 mike           1.112     return 0;
 277                      }
 278                      
 279                      static int _compareNoUTF8(const Uint16* s1, const char* s2)
 280                      {
 281                          Uint16 c1;
 282                          Uint16 c2;
 283                      
 284                          do
 285                          {
 286                              c1 = *s1++;
 287                              c2 = *s2++;
 288                      
 289                              if (c1 == 0)
 290                                  return c1 - c2;
 291                          }
 292                          while (c1 == c2);
 293                      
 294                          return c1 - c2;
 295                      }
 296                      
 297 mike           1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 298                      {
 299                          memcpy(s1, s2, n * sizeof(Uint16));
 300                      }
 301                      
 302                      void StringThrowOutOfBounds()
 303                      {
 304                          throw IndexOutOfBoundsException();
 305                      }
 306                      
 307                      inline void _checkNullPointer(const void* ptr)
 308                      {
 309                          if (!ptr)
 310                              throw NullPointer();
 311                      }
 312                      
 313                      static void _StringThrowBadUTF8(Uint32 index)
 314                      {
 315                          MessageLoaderParms parms(
 316                              "Common.String.BAD_UTF8",
 317                              "The byte sequence starting at index $0 "
 318 mike           1.112         "is not valid UTF-8 encoding.",
 319                              index);
 320                          throw Exception(parms);
 321                      }
 322                      
 323                      static size_t _copyFromUTF8(
 324 david.dillard  1.116     Uint16* dest,
 325                          const char* src,
 326 mike           1.112     size_t n,
 327                          size_t& utf8_error_index)
 328                      {
 329                          Uint16* p = dest;
 330                          const Uint8* q = (const Uint8*)src;
 331                      
 332                          // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 333                          // Use loop-unrolling.
 334                      
 335                          while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 336                          {
 337                              p[0] = q[0];
 338                              p[1] = q[1];
 339                              p[2] = q[2];
 340                              p[3] = q[3];
 341                              p[4] = q[4];
 342                              p[5] = q[5];
 343                              p[6] = q[6];
 344                              p[7] = q[7];
 345                              p += 8;
 346                              q += 8;
 347 mike           1.112         n -= 8;
 348                          }
 349                      
 350                          while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 351                          {
 352                              p[0] = q[0];
 353                              p[1] = q[1];
 354                              p[2] = q[2];
 355                              p[3] = q[3];
 356                              p += 4;
 357                              q += 4;
 358                              n -= 4;
 359                          }
 360                      
 361                          switch (n)
 362                          {
 363                              case 0:
 364                                  return p - dest;
 365                              case 1:
 366                                  if (q[0] < 128)
 367                                  {
 368 mike           1.112                 p[0] = q[0];
 369                                      return p + 1 - dest;
 370                                  }
 371                                  break;
 372                              case 2:
 373                                  if (((q[0]|q[1]) & 0x80) == 0)
 374                                  {
 375                                      p[0] = q[0];
 376                                      p[1] = q[1];
 377                                      return p + 2 - dest;
 378                                  }
 379                                  break;
 380                              case 3:
 381                                  if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 382                                  {
 383                                      p[0] = q[0];
 384                                      p[1] = q[1];
 385                                      p[2] = q[2];
 386                                      return p + 3 - dest;
 387                                  }
 388                                  break;
 389 mike           1.112     }
 390                      
 391                          // Process remaining characters.
 392                      
 393                          while (n)
 394                          {
 395                              // Optimize for 7-bit ASCII case.
 396                      
 397                              if (*q < 128)
 398                              {
 399                                  *p++ = *q++;
 400                                  n--;
 401                              }
 402                              else
 403                              {
 404                                  Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 405                      
 406                                  if (c > n || !isValid_U8(q, c) ||
 407                                      UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 408                                  {
 409                                      utf8_error_index = q - (const Uint8*)src;
 410 mike           1.112                 return size_t(-1);
 411                                  }
 412                      
 413                                  n -= c;
 414                              }
 415                          }
 416                      
 417                          return p - dest;
 418                      }
 419                      
 420 david.dillard  1.116 // Note: dest must be at least three times src (plus an extra byte for
 421 mike           1.112 // terminator).
 422                      static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 423                      {
 424                          // The following employs loop unrolling for efficiency. Please do not
 425                          // eliminate.
 426                      
 427                          const Uint16* q = src;
 428                          Uint8* p = (Uint8*)dest;
 429                      
 430                          while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 431 kumpf          1.82      {
 432 mike           1.112         p[0] = q[0];
 433                              p[1] = q[1];
 434                              p[2] = q[2];
 435                              p[3] = q[3];
 436                              p += 4;
 437                              q += 4;
 438                              n -= 4;
 439 kumpf          1.82      }
 440 mike           1.112 
 441                          switch (n)
 442                          {
 443                              case 0:
 444                                  return p - (Uint8*)dest;
 445                              case 1:
 446                                  if (q[0] < 128)
 447                                  {
 448                                      p[0] = q[0];
 449                                      return p + 1 - (Uint8*)dest;
 450                                  }
 451                                  break;
 452                              case 2:
 453                                  if (q[0] < 128 && q[1] < 128)
 454                                  {
 455                                      p[0] = q[0];
 456                                      p[1] = q[1];
 457                                      return p + 2 - (Uint8*)dest;
 458                                  }
 459                                  break;
 460                              case 3:
 461 mike           1.112             if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 462                                  {
 463                                      p[0] = q[0];
 464                                      p[1] = q[1];
 465                                      p[2] = q[2];
 466                                      return p + 3 - (Uint8*)dest;
 467                                  }
 468                                  break;
 469                          }
 470                      
 471                          // If this line was reached, there must be characters greater than 128.
 472                      
 473                          UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 474                      
 475                          return p - (Uint8*)dest;
 476 kumpf          1.54  }
 477                      
 478 mike           1.112 static inline size_t _convert(
 479                          Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
 480 kumpf          1.54  {
 481 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
 482                          _copy(p, q, n);
 483                          return n;
 484                      #else
 485                          return _copyFromUTF8(p, q, n, utf8_error_index);
 486                      #endif
 487 kumpf          1.54  }
 488                      
 489 mike           1.112 //==============================================================================
 490                      //
 491                      // class CString
 492                      //
 493                      //==============================================================================
 494                      
 495                      CString::CString(const CString& cstr) : _rep(0)
 496 kumpf          1.54  {
 497 mike           1.112     if (cstr._rep)
 498 kumpf          1.82      {
 499 mike           1.112         size_t n = strlen(cstr._rep) + 1;
 500                              _rep = (char*)operator new(n);
 501                              memcpy(_rep, cstr._rep, n);
 502 kumpf          1.82      }
 503 kumpf          1.54  }
 504                      
 505 kumpf          1.56  CString& CString::operator=(const CString& cstr)
 506                      {
 507 kumpf          1.82      if (&cstr != this)
 508 kumpf          1.81      {
 509 kumpf          1.82          if (_rep)
 510                              {
 511 mike           1.112             operator delete(_rep);
 512 kumpf          1.82              _rep = 0;
 513                              }
 514 mike           1.112 
 515 kumpf          1.82          if (cstr._rep)
 516                              {
 517 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 518                                  _rep = (char*)operator new(n);
 519                                  memcpy(_rep, cstr._rep, n);
 520 kumpf          1.82          }
 521 kumpf          1.81      }
 522 mike           1.112 
 523 kumpf          1.56      return *this;
 524                      }
 525                      
 526 mike           1.112 //==============================================================================
 527 kumpf          1.54  //
 528 mike           1.112 // class StringRep
 529 kumpf          1.39  //
 530 mike           1.112 //==============================================================================
 531 kumpf          1.39  
 532 mike           1.112 StringRep StringRep::_emptyRep;
 533 mike           1.27  
 534 mike           1.112 inline StringRep* StringRep::alloc(size_t cap)
 535 mike           1.27  {
 536 dave.sudlik    1.120     // Check for potential overflow in cap
 537                          PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 538 mike           1.27  
 539 mike           1.112     StringRep* rep = (StringRep*)::operator new(
 540                              sizeof(StringRep) + cap * sizeof(Uint16));
 541                          rep->cap = cap;
 542                          new(&rep->refs) AtomicInt(1);
 543                      
 544                          return rep;
 545 mike           1.27  }
 546                      
 547 mike           1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
 548 chuck          1.102 {
 549 mike           1.114     if (cap > rep->cap || rep->refs.get() != 1)
 550 chuck          1.102     {
 551 mike           1.112         size_t n = _roundUpToPow2(cap);
 552                              StringRep* newRep = StringRep::alloc(n);
 553                              newRep->size = rep->size;
 554                              _copy(newRep->data, rep->data, rep->size + 1);
 555                              StringRep::unref(rep);
 556                              rep = newRep;
 557                          }
 558                      }
 559 david.dillard  1.105 
 560 mike           1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
 561                      {
 562                          StringRep* rep = StringRep::alloc(size);
 563                          rep->size = size;
 564                          _copy(rep->data, data, size);
 565                          rep->data[size] = '\0';
 566                          return rep;
 567                      }
 568 chuck          1.102 
 569 mike           1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
 570                      {
 571                          // Return a new copy of rep. Release rep.
 572 chuck          1.102 
 573 mike           1.112     StringRep* newRep = StringRep::alloc(rep->size);
 574                          newRep->size = rep->size;
 575                          _copy(newRep->data, rep->data, rep->size);
 576                          newRep->data[newRep->size] = '\0';
 577                          StringRep::unref(rep);
 578                          return newRep;
 579 chuck          1.102 }
 580                      
 581 mike           1.112 StringRep* StringRep::create(const char* data, size_t size)
 582 kumpf          1.43  {
 583 mike           1.112     StringRep* rep = StringRep::alloc(size);
 584                          size_t utf8_error_index;
 585                          rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 586                      
 587                          if (rep->size == size_t(-1))
 588                          {
 589                              StringRep::free(rep);
 590                              _StringThrowBadUTF8(utf8_error_index);
 591                          }
 592 kumpf          1.43  
 593 mike           1.112     rep->data[rep->size] = '\0';
 594 kumpf          1.43  
 595 mike           1.112     return rep;
 596 mike           1.27  }
 597                      
 598 mike           1.112 Uint32 StringRep::length(const Uint16* str)
 599 mike           1.27  {
 600 mike           1.112     // Note: We could unroll this but it is rarely called.
 601                      
 602                          const Uint16* end = (Uint16*)str;
 603                      
 604                          while (*end++)
 605                              ;
 606                      
 607                          return end - str - 1;
 608 kumpf          1.39  }
 609 tony           1.66  
 610 mike           1.112 //==============================================================================
 611                      //
 612                      // class String
 613                      //
 614                      //==============================================================================
 615                      
 616                      const String String::EMPTY;
 617 mike           1.27  
 618 kumpf          1.39  String::String(const String& str, Uint32 n)
 619                      {
 620 mike           1.112     _checkBounds(n, str._rep->size);
 621                          _rep = StringRep::create(str._rep->data, n);
 622 kumpf          1.39  }
 623                      
 624                      String::String(const Char16* str)
 625                      {
 626 mike           1.112     _checkNullPointer(str);
 627                          _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 628 mike           1.27  }
 629                      
 630 kumpf          1.39  String::String(const Char16* str, Uint32 n)
 631                      {
 632 mike           1.112     _checkNullPointer(str);
 633                          _rep = StringRep::create((Uint16*)str, n);
 634 kumpf          1.39  }
 635                      
 636                      String::String(const char* str)
 637 mike           1.27  {
 638 mike           1.112     _checkNullPointer(str);
 639 david.dillard  1.105 
 640 mike           1.112     // Set this just in case create() throws an exception.
 641                          _rep = &StringRep::_emptyRep;
 642                          _rep = StringRep::create(str, strlen(str));
 643 mike           1.27  }
 644                      
 645 kumpf          1.39  String::String(const char* str, Uint32 n)
 646 mike           1.27  {
 647 mike           1.112     _checkNullPointer(str);
 648 david.dillard  1.105 
 649 mike           1.112     // Set this just in case create() throws an exception.
 650                          _rep = &StringRep::_emptyRep;
 651                          _rep = StringRep::create(str, n);
 652 kumpf          1.39  }
 653 mike           1.27  
 654 mike           1.112 String::String(const String& s1, const String& s2)
 655 kumpf          1.39  {
 656 mike           1.112     size_t n1 = s1._rep->size;
 657                          size_t n2 = s2._rep->size;
 658                          size_t n = n1 + n2;
 659                          _rep = StringRep::alloc(n);
 660                          _copy(_rep->data, s1._rep->data, n1);
 661                          _copy(_rep->data + n1, s2._rep->data, n2);
 662                          _rep->size = n;
 663                          _rep->data[n] = '\0';
 664 mike           1.27  }
 665                      
 666 mike           1.112 String::String(const String& s1, const char* s2)
 667 mike           1.27  {
 668 mike           1.112     _checkNullPointer(s2);
 669                          size_t n1 = s1._rep->size;
 670                          size_t n2 = strlen(s2);
 671                          _rep = StringRep::alloc(n1 + n2);
 672                          _copy(_rep->data, s1._rep->data, n1);
 673                          size_t utf8_error_index;
 674                          size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 675                      
 676                          if (tmp == size_t(-1))
 677 kumpf          1.82      {
 678 mike           1.112         StringRep::free(_rep);
 679                              _rep = &StringRep::_emptyRep;
 680                              _StringThrowBadUTF8(utf8_error_index);
 681 kumpf          1.82      }
 682 mike           1.112 
 683                          _rep->size = n1 + tmp;
 684                          _rep->data[_rep->size] = '\0';
 685 mike           1.27  }
 686                      
 687 mike           1.112 String::String(const char* s1, const String& s2)
 688 mike           1.27  {
 689 mike           1.112     _checkNullPointer(s1);
 690                          size_t n1 = strlen(s1);
 691                          size_t n2 = s2._rep->size;
 692                          _rep = StringRep::alloc(n1 + n2);
 693                          size_t utf8_error_index;
 694                          size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 695                      
 696                          if (tmp ==  size_t(-1))
 697                          {
 698                              StringRep::free(_rep);
 699                              _rep = &StringRep::_emptyRep;
 700                              _StringThrowBadUTF8(utf8_error_index);
 701                          }
 702                      
 703                          _rep->size = n2 + tmp;
 704                          _copy(_rep->data + n1, s2._rep->data, n2);
 705                          _rep->data[_rep->size] = '\0';
 706 mike           1.27  }
 707                      
 708 mike           1.112 String& String::assign(const String& str)
 709 mike           1.27  {
 710 mike           1.112     if (_rep != str._rep)
 711 david.dillard  1.105     {
 712 mike           1.112         StringRep::unref(_rep);
 713                              StringRep::ref(_rep = str._rep);
 714 david.dillard  1.105     }
 715                      
 716 mike           1.27      return *this;
 717                      }
 718                      
 719                      String& String::assign(const Char16* str, Uint32 n)
 720                      {
 721 mike           1.112     _checkNullPointer(str);
 722                      
 723 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 724 david.dillard  1.105     {
 725 mike           1.112         StringRep::unref(_rep);
 726                              _rep = StringRep::alloc(n);
 727 david.dillard  1.105     }
 728                      
 729 mike           1.112     _rep->size = n;
 730                          _copy(_rep->data, (Uint16*)str, n);
 731                          _rep->data[n] = '\0';
 732                      
 733 mike           1.27      return *this;
 734                      }
 735                      
 736 mike           1.112 String& String::assign(const char* str, Uint32 n)
 737 chuck          1.102 {
 738 mike           1.112     _checkNullPointer(str);
 739                      
 740 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 741 david.dillard  1.105     {
 742 mike           1.112         StringRep::unref(_rep);
 743                              _rep = StringRep::alloc(n);
 744 david.dillard  1.105     }
 745                      
 746 mike           1.112     size_t utf8_error_index;
 747                          _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 748 chuck          1.102 
 749 mike           1.112     if (_rep->size ==  size_t(-1))
 750 david.dillard  1.105     {
 751 mike           1.112         StringRep::free(_rep);
 752                              _rep = &StringRep::_emptyRep;
 753                              _StringThrowBadUTF8(utf8_error_index);
 754 david.dillard  1.105     }
 755 mike           1.112 
 756                          _rep->data[_rep->size] = 0;
 757 david.dillard  1.105 
 758 mike           1.27      return *this;
 759                      }
 760                      
 761 kumpf          1.39  void String::clear()
 762                      {
 763 mike           1.112     if (_rep->size)
 764                          {
 765 mike           1.114         if (_rep->refs.get() == 1)
 766 mike           1.112         {
 767                                  _rep->size = 0;
 768                                  _rep->data[0] = '\0';
 769                              }
 770                              else
 771                              {
 772                                  StringRep::unref(_rep);
 773                                  _rep = &StringRep::_emptyRep;
 774                              }
 775                          }
 776 kumpf          1.39  }
 777                      
 778 mike           1.112 void String::reserveCapacity(Uint32 cap)
 779 kumpf          1.39  {
 780 mike           1.112     _reserve(_rep, cap);
 781 kumpf          1.39  }
 782                      
 783 mike           1.112 CString String::getCString() const
 784                      {
 785 david.dillard  1.116     // A UTF8 string can have three times as many characters as its UTF16
 786                          // counterpart, so we allocate extra memory for the worst case. In the
 787 mike           1.112     // best case, we may need only one third of the memory allocated. But
 788 david.dillard  1.116     // downsizing the string afterwards is expensive and unecessary since
 789                          // CString objects are usually short-lived (disappearing after only a few
 790 mike           1.112     // instructions). CString objects are typically created on the stack as
 791                          // means to obtain a char* pointer.
 792                      
 793                      #ifdef PEGASUS_STRING_NO_UTF8
 794                          char* str = (char*)operator new(_rep->size + 1);
 795                          _copy(str, _rep->data, _rep->size);
 796                          str[_rep->size] = '\0';
 797                          return CString(str);
 798 gs.keenan      1.110 #else
 799 mike           1.112     Uint32 n = 3 * _rep->size;
 800                          char* str = (char*)operator new(n + 1);
 801                          size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 802                          str[size] = '\0';
 803                          return CString(str);
 804 gs.keenan      1.110 #endif
 805 kumpf          1.39  }
 806                      
 807 mike           1.112 String& String::append(const Char16* str, Uint32 n)
 808 kumpf          1.39  {
 809 mike           1.112     _checkNullPointer(str);
 810                      
 811                          size_t oldSize = _rep->size;
 812                          size_t newSize = oldSize + n;
 813                          _reserve(_rep, newSize);
 814                          _copy(_rep->data + oldSize, (Uint16*)str, n);
 815                          _rep->size = newSize;
 816                          _rep->data[newSize] = '\0';
 817                      
 818                          return *this;
 819 kumpf          1.39  }
 820                      
 821 mike           1.112 String& String::append(const String& str)
 822 mike           1.27  {
 823 w.otsuka       1.121     return append((Char16*)(&(str._rep->data[0])), str._rep->size);
 824 mike           1.27  }
 825                      
 826 mike           1.112 String& String::append(const char* str, Uint32 size)
 827 mike           1.27  {
 828 mike           1.112     _checkNullPointer(str);
 829                      
 830                          size_t oldSize = _rep->size;
 831                          size_t cap = oldSize + size;
 832                      
 833                          _reserve(_rep, cap);
 834                          size_t utf8_error_index;
 835                          size_t tmp = _convert(
 836                              (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 837                      
 838                          if (tmp ==  size_t(-1))
 839                          {
 840                              StringRep::free(_rep);
 841                              _rep = &StringRep::_emptyRep;
 842                              _StringThrowBadUTF8(utf8_error_index);
 843                          }
 844 mike           1.27  
 845 mike           1.112     _rep->size += tmp;
 846                          _rep->data[_rep->size] = '\0';
 847 mike           1.27  
 848 kumpf          1.39      return *this;
 849                      }
 850                      
 851 mike           1.112 void String::remove(Uint32 index, Uint32 n)
 852 mike           1.27  {
 853 mike           1.112     if (n == PEG_NOT_FOUND)
 854                              n = _rep->size - index;
 855                      
 856                          _checkBounds(index + n, _rep->size);
 857                      
 858 mike           1.114     if (_rep->refs.get() != 1)
 859 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 860 mike           1.27  
 861 jim.wunderlich 1.115     PEGASUS_ASSERT(index + n <= _rep->size);
 862 mike           1.27  
 863 mike           1.112     size_t rem = _rep->size - (index + n);
 864                          Uint16* data = _rep->data;
 865 mike           1.27  
 866 mike           1.112     if (rem)
 867                              memmove(data + index, data + index + n, rem * sizeof(Uint16));
 868 mike           1.27  
 869 mike           1.112     _rep->size -= n;
 870                          data[_rep->size] = '\0';
 871 mike           1.27  }
 872                      
 873 mike           1.112 String String::subString(Uint32 index, Uint32 n) const
 874 mike           1.27  {
 875 mike           1.112     // Note: this implementation is very permissive but used for
 876                          // backwards compatibility.
 877                      
 878                          if (index < _rep->size)
 879 mike           1.27      {
 880 mike           1.112         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 881                                  n = _rep->size - index;
 882 mike           1.27  
 883 w.otsuka       1.121         return String((Char16*)(_rep->data + index), n);
 884 mike           1.27      }
 885 david.dillard  1.105 
 886                          return String();
 887 mike           1.27  }
 888                      
 889                      Uint32 String::find(Char16 c) const
 890                      {
 891 mike           1.112     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 892 mike           1.27  
 893 mike           1.112     if (p)
 894 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 895 mike           1.27  
 896                          return PEG_NOT_FOUND;
 897                      }
 898                      
 899 kumpf          1.53  Uint32 String::find(Uint32 index, Char16 c) const
 900 mike           1.30  {
 901 mike           1.112     _checkBounds(index, _rep->size);
 902                      
 903                          if (index >= _rep->size)
 904                              return PEG_NOT_FOUND;
 905                      
 906                          Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 907 mike           1.30  
 908 mike           1.112     if (p)
 909 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 910 mike           1.30  
 911                          return PEG_NOT_FOUND;
 912                      }
 913                      
 914 mike           1.112 Uint32 StringFindAux(
 915                          const StringRep* _rep, const Char16* s, Uint32 n)
 916 mike           1.27  {
 917 mike           1.112     _checkNullPointer(s);
 918 mike           1.27  
 919 mike           1.112     const Uint16* data = _rep->data;
 920                          size_t rem = _rep->size;
 921                      
 922                          while (n <= rem)
 923 mike           1.30      {
 924 mike           1.112         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 925                      
 926                              if (!p)
 927                                  break;
 928 mike           1.30  
 929 mike           1.112         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 930 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 931 david.dillard  1.105 
 932 mike           1.112         p++;
 933                              rem -= p - data;
 934                              data = p;
 935 mike           1.27      }
 936 mike           1.112 
 937 mike           1.27      return PEG_NOT_FOUND;
 938                      }
 939                      
 940 mike           1.112 Uint32 String::find(const char* s) const
 941                      {
 942                          _checkNullPointer(s);
 943                      
 944                          // Note: could optimize away creation of temporary, but this is rarely
 945                          // called.
 946                          return find(String(s));
 947                      }
 948                      
 949 mike           1.27  Uint32 String::reverseFind(Char16 c) const
 950                      {
 951 mike           1.112     Uint16 x = c;
 952                          Uint16* p = _rep->data;
 953                          Uint16* q = _rep->data + _rep->size;
 954 mike           1.27  
 955 mike           1.112     while (q != p)
 956 mike           1.27      {
 957 mike           1.112         if (*--q == x)
 958 david.dillard  1.116             return static_cast<Uint32>(q - p);
 959 mike           1.27      }
 960                      
 961                          return PEG_NOT_FOUND;
 962                      }
 963                      
 964                      void String::toLower()
 965                      {
 966 david          1.69  #ifdef PEGASUS_HAS_ICU
 967 mike           1.112 
 968 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
 969 david          1.90      {
 970 mike           1.114         if (_rep->refs.get() != 1)
 971 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 972                      
 973 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
 974 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
 975                              // prevents optimizations where the us-ascii is converted before
 976 mike           1.112         // calling ICU.
 977 yi.zhou        1.108         // The string may shrink or expand after the convert.
 978                      
 979 mike           1.112         //// First calculate size of resulting string. u_strToLower() returns
 980                              //// only the size when zero is passed as the destination size argument.
 981                      
 982 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
 983                      
 984 mike           1.112         int32_t newSize = u_strToLower(
 985                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 986 david.dillard  1.116 
 987 mike           1.112         err = U_ZERO_ERROR;
 988                      
 989                              //// Reserve enough space for the result.
 990                      
 991                              if ((Uint32)newSize > _rep->cap)
 992                                  _reserve(_rep, newSize);
 993                      
 994                              //// Perform the conversion (overlapping buffers are allowed).
 995 chuck          1.99  
 996 mike           1.112         u_strToLower((UChar*)_rep->data, newSize,
 997                                  (UChar*)_rep->data, _rep->size, NULL, &err);
 998 yi.zhou        1.108 
 999 mike           1.112         _rep->size = newSize;
1000                              return;
1001 david          1.90      }
1002 mike           1.112 
1003                      #endif /* PEGASUS_HAS_ICU */
1004                      
1005 mike           1.114     if (_rep->refs.get() != 1)
1006 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
1007                      
1008                          Uint16* p = _rep->data;
1009                          size_t n = _rep->size;
1010                      
1011                          for (; n--; p++)
1012 david          1.90      {
1013 mike           1.112         if (!(*p & 0xFF00))
1014                                  *p = _toLower(*p);
1015 mike           1.27      }
1016 kumpf          1.39  }
1017                      
1018 chuck          1.99  void String::toUpper()
1019 david          1.90  {
1020                      #ifdef PEGASUS_HAS_ICU
1021 mike           1.112 
1022 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1023 chuck          1.99      {
1024 mike           1.114         if (_rep->refs.get() != 1)
1025 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1026                      
1027 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
1028 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
1029                              // prevents optimizations where the us-ascii is converted before
1030 mike           1.112         // calling ICU.
1031 yi.zhou        1.108         // The string may shrink or expand after the convert.
1032                      
1033 mike           1.112         //// First calculate size of resulting string. u_strToUpper() returns
1034                              //// only the size when zero is passed as the destination size argument.
1035                      
1036 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
1037                      
1038 mike           1.112         int32_t newSize = u_strToUpper(
1039                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1040                      
1041                              err = U_ZERO_ERROR;
1042                      
1043                              //// Reserve enough space for the result.
1044                      
1045                              if ((Uint32)newSize > _rep->cap)
1046                                  _reserve(_rep, newSize);
1047                      
1048                              //// Perform the conversion (overlapping buffers are allowed).
1049                      
1050                              u_strToUpper((UChar*)_rep->data, newSize,
1051                                  (UChar*)_rep->data, _rep->size, NULL, &err);
1052 chuck          1.99  
1053 mike           1.112         _rep->size = newSize;
1054 yi.zhou        1.108 
1055 mike           1.112         return;
1056 david          1.91      }
1057 mike           1.112 
1058                      #endif /* PEGASUS_HAS_ICU */
1059                      
1060 mike           1.114     if (_rep->refs.get() != 1)
1061 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
1062                      
1063                          Uint16* p = _rep->data;
1064                          size_t n = _rep->size;
1065                      
1066                          for (; n--; p++)
1067                              *p = _toUpper(*p);
1068 david          1.90  }
1069                      
1070 kumpf          1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
1071 kumpf          1.39  {
1072 kumpf          1.118     const Uint16* p1 = s1._rep->data;
1073                          const Uint16* p2 = s2._rep->data;
1074 mike           1.27  
1075 kumpf          1.118     while (n--)
1076                          {
1077                              int r = *p1++ - *p2++;
1078                              if (r)
1079                              {
1080                                  return r;
1081                              }
1082                              else if (!p1[-1])
1083                              {
1084                                  // We must have encountered a null terminator in both s1 and s2
1085                                  return 0;
1086                              }
1087                          }
1088                          return 0;
1089 mike           1.27  }
1090                      
1091 kumpf          1.43  int String::compare(const String& s1, const String& s2)
1092 mike           1.30  {
1093 mike           1.112     return _compare(s1._rep->data, s2._rep->data);
1094                      }
1095 kumpf          1.43  
1096 mike           1.112 int String::compare(const String& s1, const char* s2)
1097                      {
1098                          _checkNullPointer(s2);
1099 mike           1.30  
1100 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
1101                          return _compareNoUTF8(s1._rep->data, s2);
1102                      #else
1103                          // ATTN: optimize this!
1104                          return String::compare(s1, String(s2));
1105                      #endif
1106 mike           1.30  }
1107                      
1108 mike           1.112 int String::compareNoCase(const String& str1, const String& str2)
1109 kumpf          1.40  {
1110 david          1.69  #ifdef PEGASUS_HAS_ICU
1111 mike           1.112 
1112 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1113                          {
1114 mike           1.112         return  u_strcasecmp(
1115                                  str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1116 yi.zhou        1.108     }
1117 kumpf          1.40  
1118 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1119                      
1120                          const Uint16* s1 = str1._rep->data;
1121                          const Uint16* s2 = str2._rep->data;
1122                      
1123                          while (*s1 && *s2)
1124 kumpf          1.40      {
1125 mike           1.112         int r = _toLower(*s1++) - _toLower(*s2++);
1126 kumpf          1.40  
1127 david.dillard  1.105         if (r)
1128                                  return r;
1129 kumpf          1.40      }
1130                      
1131 mike           1.112     if (*s2)
1132 david.dillard  1.105         return -1;
1133 mike           1.112     else if (*s1)
1134 david.dillard  1.105         return 1;
1135 kumpf          1.40  
1136                          return 0;
1137                      }
1138                      
1139 mike           1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1140 mike           1.27  {
1141 mike           1.112 #ifdef PEGASUS_HAS_ICU
1142                      
1143                          return String::compareNoCase(s1, s2) == 0;
1144                      
1145                      #else /* PEGASUS_HAS_ICU */
1146 mike           1.27  
1147 mike           1.112     // The following employs loop unrolling for efficiency. Please do not
1148                          // eliminate.
1149 kumpf          1.39  
1150 mike           1.112     Uint16* p = (Uint16*)s1.getChar16Data();
1151                          Uint16* q = (Uint16*)s2.getChar16Data();
1152                          Uint32 n = s2.size();
1153                      
1154                          while (n >= 8)
1155                          {
1156                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1157                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1158                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1159                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1160                                  ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1161                                  ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1162                                  ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1163                                  ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1164                              {
1165                                  return false;
1166                              }
1167 kumpf          1.39  
1168 mike           1.112         n -= 8;
1169                              p += 8;
1170                              q += 8;
1171                          }
1172 mike           1.27  
1173 mike           1.112     while (n >= 4)
1174 kumpf          1.39      {
1175 mike           1.112         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1176                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1177                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1178                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1179 david.dillard  1.105         {
1180 mike           1.112             return false;
1181 david.dillard  1.105         }
1182 mike           1.112 
1183                              n -= 4;
1184                              p += 4;
1185                              q += 4;
1186                          }
1187                      
1188                          while (n--)
1189                          {
1190                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1191 david.dillard  1.105             return false;
1192 mike           1.112 
1193                              p++;
1194                              q++;
1195 kumpf          1.39      }
1196 mike           1.28  
1197 kumpf          1.39      return true;
1198 mike           1.112 
1199                      #endif /* PEGASUS_HAS_ICU */
1200 david          1.69  }
1201                      
1202 mike           1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1203 david          1.69  {
1204 mike           1.112     _checkNullPointer(s2);
1205 david          1.69  
1206 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1207 david          1.69  
1208 mike           1.112     return String::equalNoCase(s1, String(s2));
1209 david          1.69  
1210 mike           1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1211 david          1.69  
1212 mike           1.112     const Uint16* p1 = (Uint16*)s1._rep->data;
1213                          const char* p2 = s2;
1214                          size_t n = s1._rep->size;
1215 david.dillard  1.105 
1216 mike           1.112     while (n--)
1217                          {
1218                              if (!*p2)
1219                                  return false;
1220 david          1.71  
1221 mike           1.112         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1222                                  return false;
1223                          }
1224 kumpf          1.42  
1225 mike           1.112     if (*p2)
1226                              return false;
1227 david.dillard  1.116 
1228 mike           1.112     return true;
1229 karl           1.36  
1230 mike           1.112 #else /* PEGASUS_HAS_ICU */
1231 david.dillard  1.105 
1232 mike           1.112     // ATTN: optimize this!
1233                          return String::equalNoCase(s1, String(s2));
1234 david.dillard  1.105 
1235 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1236                      }
1237 chuck          1.78  
1238 mike           1.112 Boolean String::equal(const String& s1, const String& s2)
1239 karl           1.36  {
1240 david.dillard  1.116     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1241 mike           1.112         s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1242 karl           1.36  }
1243                      
1244 mike           1.112 Boolean String::equal(const String& s1, const char* s2)
1245                      {
1246                      #ifdef PEGASUS_STRING_NO_UTF8
1247 kumpf          1.35  
1248 mike           1.112     _checkNullPointer(s2);
1249 kumpf          1.39  
1250 mike           1.112     const Uint16* p = (Uint16*)s1._rep->data;
1251                          const char* q = s2;
1252 kumpf          1.39  
1253 mike           1.112     while (*p && *q)
1254                          {
1255                              if (*p++ != Uint16(*q++))
1256                                  return false;
1257                          }
1258 kumpf          1.39  
1259 mike           1.112     return !(*p || *q);
1260 kumpf          1.39  
1261 mike           1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1262 kumpf          1.39  
1263 mike           1.112     return String::equal(s1, String(s2));
1264 kumpf          1.39  
1265 mike           1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1266 kumpf          1.39  }
1267                      
1268 kumpf          1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1269 kumpf          1.39  {
1270 mike           1.112 #if defined(PEGASUS_OS_OS400)
1271 david          1.72  
1272 david          1.93      CString cstr = str.getCString();
1273 david          1.69      const char* utf8str = cstr;
1274 mike           1.112     os << utf8str;
1275                          return os;
1276 david.dillard  1.116 #else
1277 david          1.69  
1278 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1279 david          1.69  
1280 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1281                          {
1282 david.dillard  1.105         char *buf = NULL;
1283                              const int size = str.size() * 6;
1284 mike           1.112         UnicodeString UniStr(
1285                                  (const UChar *)str.getChar16Data(), (int32_t)str.size());
1286 david.dillard  1.105         Uint32 bufsize = UniStr.extract(0,size,buf);
1287                              buf = new char[bufsize+1];
1288                              UniStr.extract(0,bufsize,buf);
1289                              os << buf;
1290                              os.flush();
1291                              delete [] buf;
1292 david.dillard  1.116         return os;
1293 yi.zhou        1.108     }
1294 mike           1.112 
1295 david.dillard  1.116 #endif  // PEGASUS_HAS_ICU
1296 mike           1.112 
1297                          for (Uint32 i = 0, n = str.size(); i < n; i++)
1298 yi.zhou        1.108     {
1299 mike           1.112         Uint16 code = str[i];
1300 david.dillard  1.105 
1301 mike           1.112         if (code > 0 && !(code & 0xFF00))
1302                                      os << char(code);
1303                              else
1304                                  {
1305                                  // Print in hex format:
1306                                  char buffer[8];
1307                                  sprintf(buffer, "\\x%04X", code);
1308                                  os << buffer;
1309 david.dillard  1.105         }
1310 yi.zhou        1.108     }
1311 kumpf          1.39  
1312                          return os;
1313 mike           1.112 #endif // PEGASUS_OS_OS400
1314 kumpf          1.39  }
1315                      
1316 mike           1.112 void StringAppendCharAux(StringRep*& _rep)
1317 kumpf          1.39  {
1318 mike           1.112     StringRep* tmp;
1319                      
1320                          if (_rep->cap)
1321                          {
1322                              tmp = StringRep::alloc(2 * _rep->cap);
1323                              tmp->size = _rep->size;
1324                              _copy(tmp->data, _rep->data, _rep->size);
1325                          }
1326                          else
1327                          {
1328                              tmp = StringRep::alloc(8);
1329                              tmp->size = 0;
1330                          }
1331                      
1332                          StringRep::unref(_rep);
1333                          _rep = tmp;
1334 kumpf          1.39  }
1335                      
1336 mike           1.112 PEGASUS_NAMESPACE_END
1337                      
1338                      /*
1339                      ================================================================================
1340                      
1341                      String optimizations:
1342                      
1343                          1.  Added mechanism allowing certain functions to be inlined only when
1344                              used by internal Pegasus modules. External modules (i.e., providers)
1345                              link to a non-inline version, which allows for binary compatibility.
1346                      
1347                          2.  Implemented copy-on-write with atomic increment/decrement. This
1348                              yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1349                              for the 'ni1000' benchmark.
1350                      
1351                          3.  Employed loop unrolling in several places. For example, see:
1352                      
1353                                  static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1354                      
1355                          4.  Used the "empty-rep" optimization (described in whitepaper from the
1356                              GCC Developers Summit). This reduced default construction to a simple
1357 mike           1.112         pointer assignment.
1358                      
1359                                  inline String::String() : _rep(&_emptyRep) { }
1360                      
1361                          5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1362                              For example:
1363                      
1364                                  static const char _upper[] =
1365                                  {
1366                                      0,1,2,...255
1367                                  };
1368                      
1369                                  inline Uint16 _toUpper(Uint16 x)
1370                                  {
1371                                      return (x & 0xFF00) ? x : _upper[x];
1372                                  }
1373                      
1374 david.dillard  1.116         This outperforms the system implementation by avoiding an anding
1375 mike           1.112         operation.
1376                      
1377 david.dillard  1.116     6.  Implemented char* version of the following member functions to
1378                              eliminate unecessary creation of anonymous string objects
1379 mike           1.112         (temporaries).
1380                      
1381                                  String(const String& s1, const char* s2);
1382                                  String(const char* s1, const String& s2);
1383                                  String& String::operator=(const char* str);
1384                                  Uint32 String::find(const char* s) const;
1385                                  bool String::equal(const String& s1, const char* s2);
1386                                  static int String::compare(const String& s1, const char* s2);
1387                                  String& String::append(const char* str);
1388                                  String& String::append(const char* str, Uint32 size);
1389                                  static bool String::equalNoCase(const String& s1, const char* s2);
1390                                  String& operator=(const char* str)
1391                                  String& String::assign(const char* str)
1392                                  String& String::append(const char* str)
1393                                  Boolean operator==(const String& s1, const char* s2)
1394                                  Boolean operator==(const char* s1, const String& s2)
1395                                  Boolean operator!=(const String& s1, const char* s2)
1396                                  Boolean operator!=(const char* s1, const String& s2)
1397                                  Boolean operator<(const String& s1, const char* s2)
1398                                  Boolean operator<(const char* s1, const String& s2)
1399                                  Boolean operator>(const String& s1, const char* s2)
1400 mike           1.112             Boolean operator>(const char* s1, const String& s2)
1401                                  Boolean operator<=(const String& s1, const char* s2)
1402                                  Boolean operator<=(const char* s1, const String& s2)
1403                                  Boolean operator>=(const String& s1, const char* s2)
1404                                  Boolean operator>=(const char* s1, const String& s2)
1405                                  String operator+(const String& s1, const char* s2)
1406                                  String operator+(const char* s1, const String& s2)
1407                      
1408 david.dillard  1.116     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1409 mike           1.112         power of two (algorithm from the book "Hacker's Delight").
1410                      
1411                                  static Uint32 _roundUpToPow2(Uint32 x)
1412                                  {
1413                                      if (x < 8)
1414                                          return 8;
1415                      
1416                                      x--;
1417                                      x |= (x >> 1);
1418                                      x |= (x >> 2);
1419                                      x |= (x >> 4);
1420                                      x |= (x >> 8);
1421                                      x |= (x >> 16);
1422                                      x++;
1423                      
1424                                      return x;
1425                                  }
1426                      
1427                          8.  Implemented "concatenating constructors" to eliminate temporaries
1428 david.dillard  1.116         created by operator+(). This scheme employs the "return-value
1429 mike           1.112         optimization" described by Stan Lippman.
1430                      
1431                                  inline String operator+(const String& s1, const String& s2)
1432                                  {
1433                                      return String(s1, s2, 0);
1434                                  }
1435                      
1436                          9.  Experimented to find the optimial initial size for a short string.
1437                              Eight seems to offer the best tradeoff between space and time.
1438                      
1439                          10. Inlined all members of the Char16 class.
1440                      
1441                          11. Used Uint16 internally in the String class. This showed no improvememnt
1442                              since Char16 was already fully inlined and was essentially reduced to
1443                              Uint16 in any case.
1444                      
1445                          12. Implemented conditional logic (#if) allowing error checking logic to
1446 david.dillard  1.116         be excluded to better performance. Examples include bounds checking
1447 mike           1.112         and null-pointer checking.
1448                      
1449                          13. Used memcpy() and memcmp() where possible. These are implemented using
1450                              the rep family of intructions under Intel and are much faster.
1451                      
1452 david.dillard  1.116     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1453 mike           1.112         copy routine overhead.
1454                      
1455                          15. Added ASCII7 form of the constructor and assign().
1456                      
1457                                  String s("hello world", String::ASCII7);
1458                      
1459                                  s.assignASCII7("hello world");
1460                      
1461                              This avoids slower UTF8 processing when not needed.
1462                      
1463                      ================================================================================
1464                      
1465                      TO-DO:
1466                      
1467                          (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1468                      
1469                          (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1470                      
1471                          (+) [DONE] Eliminate char versions of find() and append().
1472                      
1473                          (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1474 mike           1.112 
1475                          (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1476                      
1477                          (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1478                      
1479                          (+) [DONE] Comment StringRep allocation layout.
1480                      
1481                          (+) [DONE] Conceal private inline functions.
1482                      
1483                          (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1484                      
1485                          (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1486                              rid of altogether.
1487                      
1488                          (+) [DONE] useCamelNotationOnAllFunctionNames.
1489                      
1490                          (+) [DONE] Check for overlow condition in StringRep::alloc().
1491                      
1492                          (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1493                      
1494                          (+) [DONE] Fix throw-related memory leak.
1495 mike           1.112 
1496                          (+) [DONE] Look at PEP223 for coding security guidelines.
1497                      
1498                          (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1499 kumpf          1.39  
1500 mike           1.112     (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1501 kumpf          1.39  
1502 mike           1.112     (+) DOC++ String.h - will open new bug?
1503 kumpf          1.39  
1504 mike           1.112     (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1505                      	on certain platforms).
1506 kumpf          1.39  
1507 mike           1.112 ================================================================================
1508                      */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2