(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.119 //%2006////////////////////////////////////////////////////////////////////////
   2 mike  1.27  //
   3 karl  1.97  // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4             // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5             // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85  // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97  // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8             // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98  // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10             // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 karl  1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  12             // EMC Corporation; Symantec Corporation; The Open Group.
  13 mike  1.27  //
  14             // Permission is hereby granted, free of charge, to any person obtaining a copy
  15 kumpf 1.41  // of this software and associated documentation files (the "Software"), to
  16             // deal in the Software without restriction, including without limitation the
  17             // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  18 mike  1.27  // sell copies of the Software, and to permit persons to whom the Software is
  19             // furnished to do so, subject to the following conditions:
  20 karl  1.119 // 
  21 kumpf 1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  22 mike  1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  23             // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  24 kumpf 1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  25             // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  26             // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  27 mike  1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28             // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29             //
  30             //==============================================================================
  31             //
  32 mike  1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
  33 mike  1.27  //
  34 david.dillard 1.116 // Modified By:
  35 mike          1.112 //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  36                     //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  37 david.dillard 1.116 //     David Dillard, Symantec Corp. (david_dillard@symantec.com)
  38 mike          1.112 //     Mike Brasher (mike-brasher@austin.rr.com)
  39 mike          1.27  //
  40                     //%/////////////////////////////////////////////////////////////////////////////
  41                     
  42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  43 mike           1.113 #include <cstring>
  44 kumpf          1.48  #include "InternalException.h"
  45 david          1.69  #include "CommonUTF.h"
  46 mike           1.112 #include "MessageLoader.h"
  47                      #include "StringRep.h"
  48 david          1.69  
  49                      #ifdef PEGASUS_HAS_ICU
  50 chuck          1.99  #include <unicode/ustring.h>
  51                      #include <unicode/uchar.h>
  52 david          1.69  #endif
  53                      
  54 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  55 mike           1.28  
  56 mike           1.112 //==============================================================================
  57                      //
  58                      // Compile-time macros (undefined by default).
  59                      //
  60                      //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  61 david.dillard  1.116 //
  62 mike           1.112 //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  63                      //
  64                      //==============================================================================
  65 mike           1.27  
  66 mike           1.112 //==============================================================================
  67 kumpf          1.39  //
  68 mike           1.112 // File-scope definitions:
  69 kumpf          1.54  //
  70 mike           1.112 //==============================================================================
  71                      
  72                      // Note: this table is much faster than the system toupper(). Please do not
  73                      // change.
  74 kumpf          1.54  
  75 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  76 kumpf          1.54  {
  77 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  78                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  79                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  80                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  81                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  82                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  83                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  84                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  85                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  86                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  87                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  88                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  89                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  90                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  91                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  92                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  93                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  94                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  95                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  96                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  97                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  98 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  99                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 100                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 101                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 102                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 103                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 104                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 105                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 106                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 107                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 108                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 109                      };
 110                      
 111                      // Note: this table is much faster than the system tulower(). Please do not
 112                      // change.
 113                      
 114 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 115 mike           1.112 {
 116                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 117                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 118                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 119                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 120                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 121                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 122                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 123                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 124                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 125                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 126                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 127                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 128                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 129                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 130                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 131                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 132                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 133                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 134                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 135                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 136 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 137                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 138                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 139                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 140                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 141                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 142                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 143                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 144                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 145                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 146                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 147                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 148                      };
 149                      
 150                      // Converts 16-bit characters to upper case. This routine is faster than the
 151                      // system toupper(). Please do not change.
 152                      inline Uint16 _toUpper(Uint16 x)
 153                      {
 154                          return (x & 0xFF00) ? x : _toUpperTable[x];
 155 kumpf          1.54  }
 156                      
 157 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 158                      // system toupper(). Please do not change.
 159                      inline Uint16 _toLower(Uint16 x)
 160 kumpf          1.54  {
 161 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 162                      }
 163                      
 164                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 165                      static Uint32 _roundUpToPow2(Uint32 x)
 166                      {
 167                      #ifndef PEGASUS_STRING_NO_THROW
 168                      
 169 dave.sudlik    1.119.2.1     // Check for potential overflow in x
 170                              PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 171 mike           1.112     
 172                          #endif
 173                          
 174                              if (x < 8)
 175                                  return 8;
 176                          
 177                              x--;
 178                              x |= (x >> 1);
 179                              x |= (x >> 2);
 180                              x |= (x >> 4);
 181                              x |= (x >> 8);
 182                              x |= (x >> 16);
 183                              x++;
 184                          
 185                              return x;
 186                          }
 187                          
 188                          template<class P, class Q>
 189                          static void _copy(P* p, const Q* q, size_t n)
 190                          {
 191                              // The following employs loop unrolling for efficiency. Please do not
 192 mike           1.112         // eliminate.
 193                          
 194                              while (n >= 8)
 195                              {
 196                                  p[0] = q[0];
 197                                  p[1] = q[1];
 198                                  p[2] = q[2];
 199                                  p[3] = q[3];
 200                                  p[4] = q[4];
 201                                  p[5] = q[5];
 202                                  p[6] = q[6];
 203                                  p[7] = q[7];
 204                                  p += 8;
 205                                  q += 8;
 206                                  n -= 8;
 207                              }
 208                          
 209                              while (n >= 4)
 210                              {
 211                                  p[0] = q[0];
 212                                  p[1] = q[1];
 213 mike           1.112             p[2] = q[2];
 214                                  p[3] = q[3];
 215                                  p += 4;
 216                                  q += 4;
 217                                  n -= 4;
 218                              }
 219                          
 220                              while (n--)
 221                                  *p++ = *q++;
 222                          }
 223                          
 224                          static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 225                          {
 226                              // The following employs loop unrolling for efficiency. Please do not
 227                              // eliminate.
 228                          
 229                              while (n >= 4)
 230                              {
 231                                  if (s[0] == c)
 232                                      return (Uint16*)s;
 233                                  if (s[1] == c)
 234 mike           1.112                 return (Uint16*)&s[1];
 235                                  if (s[2] == c)
 236                                      return (Uint16*)&s[2];
 237                                  if (s[3] == c)
 238                                      return (Uint16*)&s[3];
 239 kumpf          1.82      
 240 mike           1.112             n -= 4;
 241                                  s += 4;
 242                              }
 243                          
 244                              if (n)
 245                              {
 246                                  if (*s == c)
 247                                      return (Uint16*)s;
 248                                  s++;
 249                                  n--;
 250                              }
 251                          
 252                              if (n)
 253                              {
 254                                  if (*s == c)
 255                                      return (Uint16*)s;
 256                                  s++;
 257                                  n--;
 258                              }
 259                          
 260                              if (n && *s == c)
 261 mike           1.112             return (Uint16*)s;
 262                          
 263                              // Not found!
 264                              return 0;
 265                          }
 266                          
 267                          static int _compare(const Uint16* s1, const Uint16* s2)
 268                          {
 269                              while (*s1 && *s2)
 270                              {
 271                                  int r = *s1++ - *s2++;
 272                          
 273                                  if (r)
 274                                      return r;
 275                              }
 276                          
 277                              if (*s2)
 278                                  return -1;
 279                              else if (*s1)
 280                                  return 1;
 281                          
 282 mike           1.112         return 0;
 283                          }
 284                          
 285                          static int _compareNoUTF8(const Uint16* s1, const char* s2)
 286                          {
 287                              Uint16 c1;
 288                              Uint16 c2;
 289                          
 290                              do
 291                              {
 292                                  c1 = *s1++;
 293                                  c2 = *s2++;
 294                          
 295                                  if (c1 == 0)
 296                                      return c1 - c2;
 297                              }
 298                              while (c1 == c2);
 299                          
 300                              return c1 - c2;
 301                          }
 302                          
 303 mike           1.112     static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 304                          {
 305                              memcpy(s1, s2, n * sizeof(Uint16));
 306                          }
 307                          
 308                          void StringThrowOutOfBounds()
 309                          {
 310                              throw IndexOutOfBoundsException();
 311                          }
 312                          
 313                          inline void _checkNullPointer(const void* ptr)
 314                          {
 315 kumpf          1.117     #ifndef PEGASUS_STRING_NO_THROW
 316 mike           1.112     
 317                              if (!ptr)
 318                                  throw NullPointer();
 319                          
 320                          #endif
 321                          }
 322                          
 323                          static void _StringThrowBadUTF8(Uint32 index)
 324                          {
 325                              MessageLoaderParms parms(
 326                                  "Common.String.BAD_UTF8",
 327                                  "The byte sequence starting at index $0 "
 328                                  "is not valid UTF-8 encoding.",
 329                                  index);
 330                              throw Exception(parms);
 331                          }
 332                          
 333                          static size_t _copyFromUTF8(
 334 david.dillard  1.116         Uint16* dest,
 335                              const char* src,
 336 mike           1.112         size_t n,
 337                              size_t& utf8_error_index)
 338                          {
 339                              Uint16* p = dest;
 340                              const Uint8* q = (const Uint8*)src;
 341                          
 342                              // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 343                              // Use loop-unrolling.
 344                          
 345                              while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 346                              {
 347                                  p[0] = q[0];
 348                                  p[1] = q[1];
 349                                  p[2] = q[2];
 350                                  p[3] = q[3];
 351                                  p[4] = q[4];
 352                                  p[5] = q[5];
 353                                  p[6] = q[6];
 354                                  p[7] = q[7];
 355                                  p += 8;
 356                                  q += 8;
 357 mike           1.112             n -= 8;
 358                              }
 359                          
 360                              while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 361                              {
 362                                  p[0] = q[0];
 363                                  p[1] = q[1];
 364                                  p[2] = q[2];
 365                                  p[3] = q[3];
 366                                  p += 4;
 367                                  q += 4;
 368                                  n -= 4;
 369                              }
 370                          
 371                              switch (n)
 372                              {
 373                                  case 0:
 374                                      return p - dest;
 375                                  case 1:
 376                                      if (q[0] < 128)
 377                                      {
 378 mike           1.112                     p[0] = q[0];
 379                                          return p + 1 - dest;
 380                                      }
 381                                      break;
 382                                  case 2:
 383                                      if (((q[0]|q[1]) & 0x80) == 0)
 384                                      {
 385                                          p[0] = q[0];
 386                                          p[1] = q[1];
 387                                          return p + 2 - dest;
 388                                      }
 389                                      break;
 390                                  case 3:
 391                                      if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 392                                      {
 393                                          p[0] = q[0];
 394                                          p[1] = q[1];
 395                                          p[2] = q[2];
 396                                          return p + 3 - dest;
 397                                      }
 398                                      break;
 399 mike           1.112         }
 400                          
 401                              // Process remaining characters.
 402                          
 403                              while (n)
 404                              {
 405                                  // Optimize for 7-bit ASCII case.
 406                          
 407                                  if (*q < 128)
 408                                  {
 409                                      *p++ = *q++;
 410                                      n--;
 411                                  }
 412                                  else
 413                                  {
 414                                      Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 415                          
 416                                      if (c > n || !isValid_U8(q, c) ||
 417                                          UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 418                                      {
 419                                          utf8_error_index = q - (const Uint8*)src;
 420 mike           1.112                     return size_t(-1);
 421                                      }
 422                          
 423                                      n -= c;
 424                                  }
 425                              }
 426                          
 427                              return p - dest;
 428                          }
 429                          
 430 david.dillard  1.116     // Note: dest must be at least three times src (plus an extra byte for
 431 mike           1.112     // terminator).
 432                          static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 433                          {
 434                              // The following employs loop unrolling for efficiency. Please do not
 435                              // eliminate.
 436                          
 437                              const Uint16* q = src;
 438                              Uint8* p = (Uint8*)dest;
 439                          
 440                              while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 441 kumpf          1.82          {
 442 mike           1.112             p[0] = q[0];
 443                                  p[1] = q[1];
 444                                  p[2] = q[2];
 445                                  p[3] = q[3];
 446                                  p += 4;
 447                                  q += 4;
 448                                  n -= 4;
 449 kumpf          1.82          }
 450 mike           1.112     
 451                              switch (n)
 452                              {
 453                                  case 0:
 454                                      return p - (Uint8*)dest;
 455                                  case 1:
 456                                      if (q[0] < 128)
 457                                      {
 458                                          p[0] = q[0];
 459                                          return p + 1 - (Uint8*)dest;
 460                                      }
 461                                      break;
 462                                  case 2:
 463                                      if (q[0] < 128 && q[1] < 128)
 464                                      {
 465                                          p[0] = q[0];
 466                                          p[1] = q[1];
 467                                          return p + 2 - (Uint8*)dest;
 468                                      }
 469                                      break;
 470                                  case 3:
 471 mike           1.112                 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 472                                      {
 473                                          p[0] = q[0];
 474                                          p[1] = q[1];
 475                                          p[2] = q[2];
 476                                          return p + 3 - (Uint8*)dest;
 477                                      }
 478                                      break;
 479                              }
 480                          
 481                              // If this line was reached, there must be characters greater than 128.
 482                          
 483                              UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 484                          
 485                              return p - (Uint8*)dest;
 486 kumpf          1.54      }
 487                          
 488 mike           1.112     static inline size_t _convert(
 489                              Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
 490 kumpf          1.54      {
 491 mike           1.112     #ifdef PEGASUS_STRING_NO_UTF8
 492                              _copy(p, q, n);
 493                              return n;
 494                          #else
 495                              return _copyFromUTF8(p, q, n, utf8_error_index);
 496                          #endif
 497 kumpf          1.54      }
 498                          
 499 mike           1.112     //==============================================================================
 500                          //
 501                          // class CString
 502                          //
 503                          //==============================================================================
 504                          
 505                          CString::CString(const CString& cstr) : _rep(0)
 506 kumpf          1.54      {
 507 mike           1.112         if (cstr._rep)
 508 kumpf          1.82          {
 509 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 510                                  _rep = (char*)operator new(n);
 511                                  memcpy(_rep, cstr._rep, n);
 512 kumpf          1.82          }
 513 kumpf          1.54      }
 514                          
 515 kumpf          1.56      CString& CString::operator=(const CString& cstr)
 516                          {
 517 kumpf          1.82          if (&cstr != this)
 518 kumpf          1.81          {
 519 kumpf          1.82              if (_rep)
 520                                  {
 521 mike           1.112                 operator delete(_rep);
 522 kumpf          1.82                  _rep = 0;
 523                                  }
 524 mike           1.112     
 525 kumpf          1.82              if (cstr._rep)
 526                                  {
 527 mike           1.112                 size_t n = strlen(cstr._rep) + 1;
 528                                      _rep = (char*)operator new(n);
 529                                      memcpy(_rep, cstr._rep, n);
 530 kumpf          1.82              }
 531 kumpf          1.81          }
 532 mike           1.112     
 533 kumpf          1.56          return *this;
 534                          }
 535                          
 536 mike           1.112     //==============================================================================
 537 kumpf          1.54      //
 538 mike           1.112     // class StringRep
 539 kumpf          1.39      //
 540 mike           1.112     //==============================================================================
 541 kumpf          1.39      
 542 mike           1.112     StringRep StringRep::_emptyRep;
 543 mike           1.27      
 544 mike           1.112     inline StringRep* StringRep::alloc(size_t cap)
 545 mike           1.27      {
 546 mike           1.112     #ifndef PEGASUS_STRING_NO_THROW
 547 dave.sudlik    1.119.2.1     
 548                              // Check for potential overflow in cap
 549                              PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 550 mike           1.27      
 551 mike           1.112     #endif
 552 mike           1.27      
 553 mike           1.112         StringRep* rep = (StringRep*)::operator new(
 554                                  sizeof(StringRep) + cap * sizeof(Uint16));
 555                              rep->cap = cap;
 556                              new(&rep->refs) AtomicInt(1);
 557                          
 558                              return rep;
 559 mike           1.27      }
 560                          
 561 mike           1.112     static inline void _reserve(StringRep*& rep, Uint32 cap)
 562 chuck          1.102     {
 563 mike           1.114         if (cap > rep->cap || rep->refs.get() != 1)
 564 chuck          1.102         {
 565 mike           1.112             size_t n = _roundUpToPow2(cap);
 566                                  StringRep* newRep = StringRep::alloc(n);
 567                                  newRep->size = rep->size;
 568                                  _copy(newRep->data, rep->data, rep->size + 1);
 569                                  StringRep::unref(rep);
 570                                  rep = newRep;
 571                              }
 572                          }
 573 david.dillard  1.105     
 574 mike           1.112     StringRep* StringRep::create(const Uint16* data, size_t size)
 575                          {
 576                              StringRep* rep = StringRep::alloc(size);
 577                              rep->size = size;
 578                              _copy(rep->data, data, size);
 579                              rep->data[size] = '\0';
 580                              return rep;
 581                          }
 582 chuck          1.102     
 583 mike           1.112     StringRep* StringRep::copyOnWrite(StringRep* rep)
 584                          {
 585                              // Return a new copy of rep. Release rep.
 586 chuck          1.102     
 587 mike           1.112         StringRep* newRep = StringRep::alloc(rep->size);
 588                              newRep->size = rep->size;
 589                              _copy(newRep->data, rep->data, rep->size);
 590                              newRep->data[newRep->size] = '\0';
 591                              StringRep::unref(rep);
 592                              return newRep;
 593 chuck          1.102     }
 594                          
 595 mike           1.112     StringRep* StringRep::create(const char* data, size_t size)
 596 kumpf          1.43      {
 597 mike           1.112         StringRep* rep = StringRep::alloc(size);
 598                              size_t utf8_error_index;
 599                              rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 600                          
 601                          #ifndef PEGASUS_STRING_NO_THROW
 602                              if (rep->size == size_t(-1))
 603                              {
 604                                  StringRep::free(rep);
 605                                  _StringThrowBadUTF8(utf8_error_index);
 606                              }
 607                          #endif
 608 kumpf          1.43      
 609 mike           1.112         rep->data[rep->size] = '\0';
 610 kumpf          1.43      
 611 mike           1.112         return rep;
 612 mike           1.27      }
 613                          
 614 mike           1.112     Uint32 StringRep::length(const Uint16* str)
 615 mike           1.27      {
 616 mike           1.112         // Note: We could unroll this but it is rarely called.
 617                          
 618                              const Uint16* end = (Uint16*)str;
 619                          
 620                              while (*end++)
 621                                  ;
 622                          
 623                              return end - str - 1;
 624 kumpf          1.39      }
 625 tony           1.66      
 626 mike           1.112     //==============================================================================
 627                          //
 628                          // class String
 629                          //
 630                          //==============================================================================
 631                          
 632                          const String String::EMPTY;
 633 mike           1.27      
 634 kumpf          1.39      String::String(const String& str, Uint32 n)
 635                          {
 636 mike           1.112         _checkBounds(n, str._rep->size);
 637                              _rep = StringRep::create(str._rep->data, n);
 638 kumpf          1.39      }
 639                          
 640                          String::String(const Char16* str)
 641                          {
 642 mike           1.112         _checkNullPointer(str);
 643                              _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 644 mike           1.27      }
 645                          
 646 kumpf          1.39      String::String(const Char16* str, Uint32 n)
 647                          {
 648 mike           1.112         _checkNullPointer(str);
 649                              _rep = StringRep::create((Uint16*)str, n);
 650 kumpf          1.39      }
 651                          
 652                          String::String(const char* str)
 653 mike           1.27      {
 654 mike           1.112         _checkNullPointer(str);
 655 david.dillard  1.105     
 656 mike           1.112         // Set this just in case create() throws an exception.
 657                              _rep = &StringRep::_emptyRep;
 658                              _rep = StringRep::create(str, strlen(str));
 659 mike           1.27      }
 660                          
 661 kumpf          1.39      String::String(const char* str, Uint32 n)
 662 mike           1.27      {
 663 mike           1.112         _checkNullPointer(str);
 664 david.dillard  1.105     
 665 mike           1.112         // Set this just in case create() throws an exception.
 666                              _rep = &StringRep::_emptyRep;
 667                              _rep = StringRep::create(str, n);
 668 kumpf          1.39      }
 669 mike           1.27      
 670 mike           1.112     String::String(const String& s1, const String& s2)
 671 kumpf          1.39      {
 672 mike           1.112         size_t n1 = s1._rep->size;
 673                              size_t n2 = s2._rep->size;
 674                              size_t n = n1 + n2;
 675                              _rep = StringRep::alloc(n);
 676                              _copy(_rep->data, s1._rep->data, n1);
 677                              _copy(_rep->data + n1, s2._rep->data, n2);
 678                              _rep->size = n;
 679                              _rep->data[n] = '\0';
 680 mike           1.27      }
 681                          
 682 mike           1.112     String::String(const String& s1, const char* s2)
 683 mike           1.27      {
 684 mike           1.112         _checkNullPointer(s2);
 685                              size_t n1 = s1._rep->size;
 686                              size_t n2 = strlen(s2);
 687                              _rep = StringRep::alloc(n1 + n2);
 688                              _copy(_rep->data, s1._rep->data, n1);
 689                              size_t utf8_error_index;
 690                              size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 691                          
 692                          #ifndef PEGASUS_STRING_NO_THROW
 693                              if (tmp == size_t(-1))
 694 kumpf          1.82          {
 695 mike           1.112             StringRep::free(_rep);
 696                                  _rep = &StringRep::_emptyRep;
 697                                  _StringThrowBadUTF8(utf8_error_index);
 698 kumpf          1.82          }
 699 mike           1.112     #endif
 700                          
 701                              _rep->size = n1 + tmp;
 702                              _rep->data[_rep->size] = '\0';
 703 mike           1.27      }
 704                          
 705 mike           1.112     String::String(const char* s1, const String& s2)
 706 mike           1.27      {
 707 mike           1.112         _checkNullPointer(s1);
 708                              size_t n1 = strlen(s1);
 709                              size_t n2 = s2._rep->size;
 710                              _rep = StringRep::alloc(n1 + n2);
 711                              size_t utf8_error_index;
 712                              size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 713                          
 714                          #ifndef PEGASUS_STRING_NO_THROW
 715                              if (tmp ==  size_t(-1))
 716                              {
 717                                  StringRep::free(_rep);
 718                                  _rep = &StringRep::_emptyRep;
 719                                  _StringThrowBadUTF8(utf8_error_index);
 720                              }
 721                          #endif
 722                          
 723                              _rep->size = n2 + tmp;
 724                              _copy(_rep->data + n1, s2._rep->data, n2);
 725                              _rep->data[_rep->size] = '\0';
 726 mike           1.27      }
 727                          
 728 mike           1.112     String& String::assign(const String& str)
 729 mike           1.27      {
 730 mike           1.112         if (_rep != str._rep)
 731 david.dillard  1.105         {
 732 mike           1.112             StringRep::unref(_rep);
 733                                  StringRep::ref(_rep = str._rep);
 734 david.dillard  1.105         }
 735                          
 736 mike           1.27          return *this;
 737                          }
 738                          
 739                          String& String::assign(const Char16* str, Uint32 n)
 740                          {
 741 mike           1.112         _checkNullPointer(str);
 742                          
 743 mike           1.114         if (n > _rep->cap || _rep->refs.get() != 1)
 744 david.dillard  1.105         {
 745 mike           1.112             StringRep::unref(_rep);
 746                                  _rep = StringRep::alloc(n);
 747 david.dillard  1.105         }
 748                          
 749 mike           1.112         _rep->size = n;
 750                              _copy(_rep->data, (Uint16*)str, n);
 751                              _rep->data[n] = '\0';
 752                          
 753 mike           1.27          return *this;
 754                          }
 755                          
 756 mike           1.112     String& String::assign(const char* str, Uint32 n)
 757 chuck          1.102     {
 758 mike           1.112         _checkNullPointer(str);
 759                          
 760 mike           1.114         if (n > _rep->cap || _rep->refs.get() != 1)
 761 david.dillard  1.105         {
 762 mike           1.112             StringRep::unref(_rep);
 763                                  _rep = StringRep::alloc(n);
 764 david.dillard  1.105         }
 765                          
 766 mike           1.112         size_t utf8_error_index;
 767                              _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 768 chuck          1.102     
 769 mike           1.112     #ifndef PEGASUS_STRING_NO_THROW
 770                              if (_rep->size ==  size_t(-1))
 771 david.dillard  1.105         {
 772 mike           1.112             StringRep::free(_rep);
 773                                  _rep = &StringRep::_emptyRep;
 774                                  _StringThrowBadUTF8(utf8_error_index);
 775 david.dillard  1.105         }
 776 mike           1.112     #endif
 777                          
 778                              _rep->data[_rep->size] = 0;
 779 david.dillard  1.105     
 780 mike           1.27          return *this;
 781                          }
 782                          
 783 kumpf          1.39      void String::clear()
 784                          {
 785 mike           1.112         if (_rep->size)
 786                              {
 787 mike           1.114             if (_rep->refs.get() == 1)
 788 mike           1.112             {
 789                                      _rep->size = 0;
 790                                      _rep->data[0] = '\0';
 791                                  }
 792                                  else
 793                                  {
 794                                      StringRep::unref(_rep);
 795                                      _rep = &StringRep::_emptyRep;
 796                                  }
 797                              }
 798 kumpf          1.39      }
 799                          
 800 mike           1.112     void String::reserveCapacity(Uint32 cap)
 801 kumpf          1.39      {
 802 mike           1.112         _reserve(_rep, cap);
 803 kumpf          1.39      }
 804                          
 805 mike           1.112     CString String::getCString() const
 806                          {
 807 david.dillard  1.116         // A UTF8 string can have three times as many characters as its UTF16
 808                              // counterpart, so we allocate extra memory for the worst case. In the
 809 mike           1.112         // best case, we may need only one third of the memory allocated. But
 810 david.dillard  1.116         // downsizing the string afterwards is expensive and unecessary since
 811                              // CString objects are usually short-lived (disappearing after only a few
 812 mike           1.112         // instructions). CString objects are typically created on the stack as
 813                              // means to obtain a char* pointer.
 814                          
 815                          #ifdef PEGASUS_STRING_NO_UTF8
 816                              char* str = (char*)operator new(_rep->size + 1);
 817                              _copy(str, _rep->data, _rep->size);
 818                              str[_rep->size] = '\0';
 819                              return CString(str);
 820 gs.keenan      1.110     #else
 821 mike           1.112         Uint32 n = 3 * _rep->size;
 822                              char* str = (char*)operator new(n + 1);
 823                              size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 824                              str[size] = '\0';
 825                              return CString(str);
 826 gs.keenan      1.110     #endif
 827 kumpf          1.39      }
 828                          
 829 mike           1.112     String& String::append(const Char16* str, Uint32 n)
 830 kumpf          1.39      {
 831 mike           1.112         _checkNullPointer(str);
 832                          
 833                              size_t oldSize = _rep->size;
 834                              size_t newSize = oldSize + n;
 835                              _reserve(_rep, newSize);
 836                              _copy(_rep->data + oldSize, (Uint16*)str, n);
 837                              _rep->size = newSize;
 838                              _rep->data[newSize] = '\0';
 839                          
 840                              return *this;
 841 kumpf          1.39      }
 842                          
 843 mike           1.112     String& String::append(const String& str)
 844 mike           1.27      {
 845 mike           1.112         return append((Char16*)str._rep->data, str._rep->size);
 846 mike           1.27      }
 847                          
 848 mike           1.112     String& String::append(const char* str, Uint32 size)
 849 mike           1.27      {
 850 mike           1.112         _checkNullPointer(str);
 851                          
 852                              size_t oldSize = _rep->size;
 853                              size_t cap = oldSize + size;
 854                          
 855                              _reserve(_rep, cap);
 856                              size_t utf8_error_index;
 857                              size_t tmp = _convert(
 858                                  (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 859                          
 860                          #ifndef PEGASUS_STRING_NO_THROW
 861                              if (tmp ==  size_t(-1))
 862                              {
 863                                  StringRep::free(_rep);
 864                                  _rep = &StringRep::_emptyRep;
 865                                  _StringThrowBadUTF8(utf8_error_index);
 866                              }
 867                          #endif
 868 mike           1.27      
 869 mike           1.112         _rep->size += tmp;
 870                              _rep->data[_rep->size] = '\0';
 871 mike           1.27      
 872 kumpf          1.39          return *this;
 873                          }
 874                          
 875 mike           1.112     void String::remove(Uint32 index, Uint32 n)
 876 mike           1.27      {
 877 mike           1.112         if (n == PEG_NOT_FOUND)
 878                                  n = _rep->size - index;
 879                          
 880                              _checkBounds(index + n, _rep->size);
 881                          
 882 mike           1.114         if (_rep->refs.get() != 1)
 883 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 884 mike           1.27      
 885 jim.wunderlich 1.115         PEGASUS_ASSERT(index + n <= _rep->size);
 886 mike           1.27      
 887 mike           1.112         size_t rem = _rep->size - (index + n);
 888                              Uint16* data = _rep->data;
 889 mike           1.27      
 890 mike           1.112         if (rem)
 891                                  memmove(data + index, data + index + n, rem * sizeof(Uint16));
 892 mike           1.27      
 893 mike           1.112         _rep->size -= n;
 894                              data[_rep->size] = '\0';
 895 mike           1.27      }
 896                          
 897 mike           1.112     String String::subString(Uint32 index, Uint32 n) const
 898 mike           1.27      {
 899 mike           1.112         // Note: this implementation is very permissive but used for
 900                              // backwards compatibility.
 901                          
 902                              if (index < _rep->size)
 903 mike           1.27          {
 904 mike           1.112             if (n == PEG_NOT_FOUND || n > _rep->size - index)
 905                                      n = _rep->size - index;
 906 mike           1.27      
 907 mike           1.112             return String((Char16*)_rep->data + index, n);
 908 mike           1.27          }
 909 david.dillard  1.105     
 910                              return String();
 911 mike           1.27      }
 912                          
 913                          Uint32 String::find(Char16 c) const
 914                          {
 915 mike           1.112         Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 916 mike           1.27      
 917 mike           1.112         if (p)
 918 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 919 mike           1.27      
 920                              return PEG_NOT_FOUND;
 921                          }
 922                          
 923 kumpf          1.53      Uint32 String::find(Uint32 index, Char16 c) const
 924 mike           1.30      {
 925 mike           1.112         _checkBounds(index, _rep->size);
 926                          
 927                              if (index >= _rep->size)
 928                                  return PEG_NOT_FOUND;
 929                          
 930                              Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 931 mike           1.30      
 932 mike           1.112         if (p)
 933 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 934 mike           1.30      
 935                              return PEG_NOT_FOUND;
 936                          }
 937                          
 938 mike           1.112     Uint32 StringFindAux(
 939                              const StringRep* _rep, const Char16* s, Uint32 n)
 940 mike           1.27      {
 941 mike           1.112         _checkNullPointer(s);
 942 mike           1.27      
 943 mike           1.112         const Uint16* data = _rep->data;
 944                              size_t rem = _rep->size;
 945                          
 946                              while (n <= rem)
 947 mike           1.30          {
 948 mike           1.112             Uint16* p = (Uint16*)_find(data, rem, s[0]);
 949                          
 950                                  if (!p)
 951                                      break;
 952 mike           1.30      
 953 mike           1.112             if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 954 david.dillard  1.116                 return static_cast<Uint32>(p - _rep->data);
 955 david.dillard  1.105     
 956 mike           1.112             p++;
 957                                  rem -= p - data;
 958                                  data = p;
 959 mike           1.27          }
 960 mike           1.112     
 961 mike           1.27          return PEG_NOT_FOUND;
 962                          }
 963                          
 964 mike           1.112     Uint32 String::find(const char* s) const
 965                          {
 966                              _checkNullPointer(s);
 967                          
 968                              // Note: could optimize away creation of temporary, but this is rarely
 969                              // called.
 970                              return find(String(s));
 971                          }
 972                          
 973 mike           1.27      Uint32 String::reverseFind(Char16 c) const
 974                          {
 975 mike           1.112         Uint16 x = c;
 976                              Uint16* p = _rep->data;
 977                              Uint16* q = _rep->data + _rep->size;
 978 mike           1.27      
 979 mike           1.112         while (q != p)
 980 mike           1.27          {
 981 mike           1.112             if (*--q == x)
 982 david.dillard  1.116                 return static_cast<Uint32>(q - p);
 983 mike           1.27          }
 984                          
 985                              return PEG_NOT_FOUND;
 986                          }
 987                          
 988                          void String::toLower()
 989                          {
 990 david          1.69      #ifdef PEGASUS_HAS_ICU
 991 mike           1.112     
 992 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
 993 david          1.90          {
 994 mike           1.114             if (_rep->refs.get() != 1)
 995 mike           1.112                 _rep = StringRep::copyOnWrite(_rep);
 996                          
 997 yi.zhou        1.108             // This will do a locale-insensitive, but context-sensitive convert.
 998 david.dillard  1.116             // Since context-sensitive casing looks at adjacent chars, this
 999                                  // prevents optimizations where the us-ascii is converted before
1000 mike           1.112             // calling ICU.
1001 yi.zhou        1.108             // The string may shrink or expand after the convert.
1002                          
1003 mike           1.112             //// First calculate size of resulting string. u_strToLower() returns
1004                                  //// only the size when zero is passed as the destination size argument.
1005                          
1006 yi.zhou        1.108             UErrorCode err = U_ZERO_ERROR;
1007                          
1008 mike           1.112             int32_t newSize = u_strToLower(
1009                                      NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1010 david.dillard  1.116     
1011 mike           1.112             err = U_ZERO_ERROR;
1012                          
1013                                  //// Reserve enough space for the result.
1014                          
1015                                  if ((Uint32)newSize > _rep->cap)
1016                                      _reserve(_rep, newSize);
1017                          
1018                                  //// Perform the conversion (overlapping buffers are allowed).
1019 chuck          1.99      
1020 mike           1.112             u_strToLower((UChar*)_rep->data, newSize,
1021                                      (UChar*)_rep->data, _rep->size, NULL, &err);
1022 yi.zhou        1.108     
1023 mike           1.112             _rep->size = newSize;
1024                                  return;
1025 david          1.90          }
1026 mike           1.112     
1027                          #endif /* PEGASUS_HAS_ICU */
1028                          
1029 mike           1.114         if (_rep->refs.get() != 1)
1030 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1031                          
1032                              Uint16* p = _rep->data;
1033                              size_t n = _rep->size;
1034                          
1035                              for (; n--; p++)
1036 david          1.90          {
1037 mike           1.112             if (!(*p & 0xFF00))
1038                                      *p = _toLower(*p);
1039 mike           1.27          }
1040 kumpf          1.39      }
1041                          
1042 chuck          1.99      void String::toUpper()
1043 david          1.90      {
1044                          #ifdef PEGASUS_HAS_ICU
1045 mike           1.112     
1046 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
1047 chuck          1.99          {
1048 mike           1.114             if (_rep->refs.get() != 1)
1049 mike           1.112                 _rep = StringRep::copyOnWrite(_rep);
1050                          
1051 yi.zhou        1.108             // This will do a locale-insensitive, but context-sensitive convert.
1052 david.dillard  1.116             // Since context-sensitive casing looks at adjacent chars, this
1053                                  // prevents optimizations where the us-ascii is converted before
1054 mike           1.112             // calling ICU.
1055 yi.zhou        1.108             // The string may shrink or expand after the convert.
1056                          
1057 mike           1.112             //// First calculate size of resulting string. u_strToUpper() returns
1058                                  //// only the size when zero is passed as the destination size argument.
1059                          
1060 yi.zhou        1.108             UErrorCode err = U_ZERO_ERROR;
1061                          
1062 mike           1.112             int32_t newSize = u_strToUpper(
1063                                      NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1064                          
1065                                  err = U_ZERO_ERROR;
1066                          
1067                                  //// Reserve enough space for the result.
1068                          
1069                                  if ((Uint32)newSize > _rep->cap)
1070                                      _reserve(_rep, newSize);
1071                          
1072                                  //// Perform the conversion (overlapping buffers are allowed).
1073                          
1074                                  u_strToUpper((UChar*)_rep->data, newSize,
1075                                      (UChar*)_rep->data, _rep->size, NULL, &err);
1076 chuck          1.99      
1077 mike           1.112             _rep->size = newSize;
1078 yi.zhou        1.108     
1079 mike           1.112             return;
1080 david          1.91          }
1081 mike           1.112     
1082                          #endif /* PEGASUS_HAS_ICU */
1083                          
1084 mike           1.114         if (_rep->refs.get() != 1)
1085 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1086                          
1087                              Uint16* p = _rep->data;
1088                              size_t n = _rep->size;
1089                          
1090                              for (; n--; p++)
1091                                  *p = _toUpper(*p);
1092 david          1.90      }
1093                          
1094 kumpf          1.43      int String::compare(const String& s1, const String& s2, Uint32 n)
1095 kumpf          1.39      {
1096 kumpf          1.118         const Uint16* p1 = s1._rep->data;
1097                              const Uint16* p2 = s2._rep->data;
1098 mike           1.27      
1099 kumpf          1.118         while (n--)
1100                              {
1101                                  int r = *p1++ - *p2++;
1102                                  if (r)
1103                                  {
1104                                      return r;
1105                                  }
1106                                  else if (!p1[-1])
1107                                  {
1108                                      // We must have encountered a null terminator in both s1 and s2
1109                                      return 0;
1110                                  }
1111                              }
1112                              return 0;
1113 mike           1.27      }
1114                          
1115 kumpf          1.43      int String::compare(const String& s1, const String& s2)
1116 mike           1.30      {
1117 mike           1.112         return _compare(s1._rep->data, s2._rep->data);
1118                          }
1119 kumpf          1.43      
1120 mike           1.112     int String::compare(const String& s1, const char* s2)
1121                          {
1122                              _checkNullPointer(s2);
1123 mike           1.30      
1124 mike           1.112     #ifdef PEGASUS_STRING_NO_UTF8
1125                              return _compareNoUTF8(s1._rep->data, s2);
1126                          #else
1127                              // ATTN: optimize this!
1128                              return String::compare(s1, String(s2));
1129                          #endif
1130 mike           1.30      }
1131                          
1132 mike           1.112     int String::compareNoCase(const String& str1, const String& str2)
1133 kumpf          1.40      {
1134 david          1.69      #ifdef PEGASUS_HAS_ICU
1135 mike           1.112     
1136 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
1137                              {
1138 mike           1.112             return  u_strcasecmp(
1139                                      str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1140 yi.zhou        1.108         }
1141 kumpf          1.40      
1142 mike           1.112     #endif /* PEGASUS_HAS_ICU */
1143                          
1144                              const Uint16* s1 = str1._rep->data;
1145                              const Uint16* s2 = str2._rep->data;
1146                          
1147                              while (*s1 && *s2)
1148 kumpf          1.40          {
1149 mike           1.112             int r = _toLower(*s1++) - _toLower(*s2++);
1150 kumpf          1.40      
1151 david.dillard  1.105             if (r)
1152                                      return r;
1153 kumpf          1.40          }
1154                          
1155 mike           1.112         if (*s2)
1156 david.dillard  1.105             return -1;
1157 mike           1.112         else if (*s1)
1158 david.dillard  1.105             return 1;
1159 kumpf          1.40      
1160                              return 0;
1161                          }
1162                          
1163 mike           1.112     Boolean StringEqualNoCase(const String& s1, const String& s2)
1164 mike           1.27      {
1165 mike           1.112     #ifdef PEGASUS_HAS_ICU
1166                          
1167                              return String::compareNoCase(s1, s2) == 0;
1168                          
1169                          #else /* PEGASUS_HAS_ICU */
1170 mike           1.27      
1171 mike           1.112         // The following employs loop unrolling for efficiency. Please do not
1172                              // eliminate.
1173 kumpf          1.39      
1174 mike           1.112         Uint16* p = (Uint16*)s1.getChar16Data();
1175                              Uint16* q = (Uint16*)s2.getChar16Data();
1176                              Uint32 n = s2.size();
1177                          
1178                              while (n >= 8)
1179                              {
1180                                  if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1181                                      ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1182                                      ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1183                                      ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1184                                      ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1185                                      ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1186                                      ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1187                                      ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1188                                  {
1189                                      return false;
1190                                  }
1191 kumpf          1.39      
1192 mike           1.112             n -= 8;
1193                                  p += 8;
1194                                  q += 8;
1195                              }
1196 mike           1.27      
1197 mike           1.112         while (n >= 4)
1198 kumpf          1.39          {
1199 mike           1.112             if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1200                                      ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1201                                      ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1202                                      ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1203 david.dillard  1.105             {
1204 mike           1.112                 return false;
1205 david.dillard  1.105             }
1206 mike           1.112     
1207                                  n -= 4;
1208                                  p += 4;
1209                                  q += 4;
1210                              }
1211                          
1212                              while (n--)
1213                              {
1214                                  if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1215 david.dillard  1.105                 return false;
1216 mike           1.112     
1217                                  p++;
1218                                  q++;
1219 kumpf          1.39          }
1220 mike           1.28      
1221 kumpf          1.39          return true;
1222 mike           1.112     
1223                          #endif /* PEGASUS_HAS_ICU */
1224 david          1.69      }
1225                          
1226 mike           1.112     Boolean String::equalNoCase(const String& s1, const char* s2)
1227 david          1.69      {
1228 mike           1.112         _checkNullPointer(s2);
1229 david          1.69      
1230 mike           1.112     #if defined(PEGASUS_HAS_ICU)
1231 david          1.69      
1232 mike           1.112         return String::equalNoCase(s1, String(s2));
1233 david          1.69      
1234 mike           1.112     #elif defined(PEGASUS_STRING_NO_UTF8)
1235 david          1.69      
1236 mike           1.112         const Uint16* p1 = (Uint16*)s1._rep->data;
1237                              const char* p2 = s2;
1238                              size_t n = s1._rep->size;
1239 david.dillard  1.105     
1240 mike           1.112         while (n--)
1241                              {
1242                                  if (!*p2)
1243                                      return false;
1244 david          1.71      
1245 mike           1.112             if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1246                                      return false;
1247                              }
1248 kumpf          1.42      
1249 mike           1.112         if (*p2)
1250                                  return false;
1251 david.dillard  1.116     
1252 mike           1.112         return true;
1253 karl           1.36      
1254 mike           1.112     #else /* PEGASUS_HAS_ICU */
1255 david.dillard  1.105     
1256 mike           1.112         // ATTN: optimize this!
1257                              return String::equalNoCase(s1, String(s2));
1258 david.dillard  1.105     
1259 mike           1.112     #endif /* PEGASUS_HAS_ICU */
1260                          }
1261 chuck          1.78      
1262 mike           1.112     Boolean String::equal(const String& s1, const String& s2)
1263 karl           1.36      {
1264 david.dillard  1.116         return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1265 mike           1.112             s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1266 karl           1.36      }
1267                          
1268 mike           1.112     Boolean String::equal(const String& s1, const char* s2)
1269                          {
1270                          #ifdef PEGASUS_STRING_NO_UTF8
1271 kumpf          1.35      
1272 mike           1.112         _checkNullPointer(s2);
1273 kumpf          1.39      
1274 mike           1.112         const Uint16* p = (Uint16*)s1._rep->data;
1275                              const char* q = s2;
1276 kumpf          1.39      
1277 mike           1.112         while (*p && *q)
1278                              {
1279                                  if (*p++ != Uint16(*q++))
1280                                      return false;
1281                              }
1282 kumpf          1.39      
1283 mike           1.112         return !(*p || *q);
1284 kumpf          1.39      
1285 mike           1.112     #else /* PEGASUS_STRING_NO_UTF8 */
1286 kumpf          1.39      
1287 mike           1.112         return String::equal(s1, String(s2));
1288 kumpf          1.39      
1289 mike           1.112     #endif /* PEGASUS_STRING_NO_UTF8 */
1290 kumpf          1.39      }
1291                          
1292 kumpf          1.47      PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1293 kumpf          1.39      {
1294 mike           1.112     #if defined(PEGASUS_OS_OS400)
1295 david          1.72      
1296 david          1.93          CString cstr = str.getCString();
1297 david          1.69          const char* utf8str = cstr;
1298 mike           1.112         os << utf8str;
1299                              return os;
1300 david.dillard  1.116     #else
1301 david          1.69      
1302 mike           1.112     #if defined(PEGASUS_HAS_ICU)
1303 david          1.69      
1304 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
1305                              {
1306 david.dillard  1.105             char *buf = NULL;
1307                                  const int size = str.size() * 6;
1308 mike           1.112             UnicodeString UniStr(
1309                                      (const UChar *)str.getChar16Data(), (int32_t)str.size());
1310 david.dillard  1.105             Uint32 bufsize = UniStr.extract(0,size,buf);
1311                                  buf = new char[bufsize+1];
1312                                  UniStr.extract(0,bufsize,buf);
1313                                  os << buf;
1314                                  os.flush();
1315                                  delete [] buf;
1316 david.dillard  1.116             return os;
1317 yi.zhou        1.108         }
1318 mike           1.112     
1319 david.dillard  1.116     #endif  // PEGASUS_HAS_ICU
1320 mike           1.112     
1321                              for (Uint32 i = 0, n = str.size(); i < n; i++)
1322 yi.zhou        1.108         {
1323 mike           1.112             Uint16 code = str[i];
1324 david.dillard  1.105     
1325 mike           1.112             if (code > 0 && !(code & 0xFF00))
1326                                          os << char(code);
1327                                  else
1328                                      {
1329                                      // Print in hex format:
1330                                      char buffer[8];
1331                                      sprintf(buffer, "\\x%04X", code);
1332                                      os << buffer;
1333 david.dillard  1.105             }
1334 yi.zhou        1.108         }
1335 kumpf          1.39      
1336                              return os;
1337 mike           1.112     #endif // PEGASUS_OS_OS400
1338 kumpf          1.39      }
1339                          
1340 mike           1.112     void StringAppendCharAux(StringRep*& _rep)
1341 kumpf          1.39      {
1342 mike           1.112         StringRep* tmp;
1343                          
1344                              if (_rep->cap)
1345                              {
1346                                  tmp = StringRep::alloc(2 * _rep->cap);
1347                                  tmp->size = _rep->size;
1348                                  _copy(tmp->data, _rep->data, _rep->size);
1349                              }
1350                              else
1351                              {
1352                                  tmp = StringRep::alloc(8);
1353                                  tmp->size = 0;
1354                              }
1355                          
1356                              StringRep::unref(_rep);
1357                              _rep = tmp;
1358 kumpf          1.39      }
1359                          
1360 mike           1.112     PEGASUS_NAMESPACE_END
1361                          
1362                          /*
1363                          ================================================================================
1364                          
1365                          String optimizations:
1366                          
1367                              1.  Added mechanism allowing certain functions to be inlined only when
1368                                  used by internal Pegasus modules. External modules (i.e., providers)
1369                                  link to a non-inline version, which allows for binary compatibility.
1370                          
1371                              2.  Implemented copy-on-write with atomic increment/decrement. This
1372                                  yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1373                                  for the 'ni1000' benchmark.
1374                          
1375                              3.  Employed loop unrolling in several places. For example, see:
1376                          
1377                                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1378                          
1379                              4.  Used the "empty-rep" optimization (described in whitepaper from the
1380                                  GCC Developers Summit). This reduced default construction to a simple
1381 mike           1.112             pointer assignment.
1382                          
1383                                      inline String::String() : _rep(&_emptyRep) { }
1384                          
1385                              5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1386                                  For example:
1387                          
1388                                      static const char _upper[] =
1389                                      {
1390                                          0,1,2,...255
1391                                      };
1392                          
1393                                      inline Uint16 _toUpper(Uint16 x)
1394                                      {
1395                                          return (x & 0xFF00) ? x : _upper[x];
1396                                      }
1397                          
1398 david.dillard  1.116             This outperforms the system implementation by avoiding an anding
1399 mike           1.112             operation.
1400                          
1401 david.dillard  1.116         6.  Implemented char* version of the following member functions to
1402                                  eliminate unecessary creation of anonymous string objects
1403 mike           1.112             (temporaries).
1404                          
1405                                      String(const String& s1, const char* s2);
1406                                      String(const char* s1, const String& s2);
1407                                      String& String::operator=(const char* str);
1408                                      Uint32 String::find(const char* s) const;
1409                                      bool String::equal(const String& s1, const char* s2);
1410                                      static int String::compare(const String& s1, const char* s2);
1411                                      String& String::append(const char* str);
1412                                      String& String::append(const char* str, Uint32 size);
1413                                      static bool String::equalNoCase(const String& s1, const char* s2);
1414                                      String& operator=(const char* str)
1415                                      String& String::assign(const char* str)
1416                                      String& String::append(const char* str)
1417                                      Boolean operator==(const String& s1, const char* s2)
1418                                      Boolean operator==(const char* s1, const String& s2)
1419                                      Boolean operator!=(const String& s1, const char* s2)
1420                                      Boolean operator!=(const char* s1, const String& s2)
1421                                      Boolean operator<(const String& s1, const char* s2)
1422                                      Boolean operator<(const char* s1, const String& s2)
1423                                      Boolean operator>(const String& s1, const char* s2)
1424 mike           1.112                 Boolean operator>(const char* s1, const String& s2)
1425                                      Boolean operator<=(const String& s1, const char* s2)
1426                                      Boolean operator<=(const char* s1, const String& s2)
1427                                      Boolean operator>=(const String& s1, const char* s2)
1428                                      Boolean operator>=(const char* s1, const String& s2)
1429                                      String operator+(const String& s1, const char* s2)
1430                                      String operator+(const char* s1, const String& s2)
1431                          
1432 david.dillard  1.116         7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1433 mike           1.112             power of two (algorithm from the book "Hacker's Delight").
1434                          
1435                                      static Uint32 _roundUpToPow2(Uint32 x)
1436                                      {
1437                                          if (x < 8)
1438                                              return 8;
1439                          
1440                                          x--;
1441                                          x |= (x >> 1);
1442                                          x |= (x >> 2);
1443                                          x |= (x >> 4);
1444                                          x |= (x >> 8);
1445                                          x |= (x >> 16);
1446                                          x++;
1447                          
1448                                          return x;
1449                                      }
1450                          
1451                              8.  Implemented "concatenating constructors" to eliminate temporaries
1452 david.dillard  1.116             created by operator+(). This scheme employs the "return-value
1453 mike           1.112             optimization" described by Stan Lippman.
1454                          
1455                                      inline String operator+(const String& s1, const String& s2)
1456                                      {
1457                                          return String(s1, s2, 0);
1458                                      }
1459                          
1460                              9.  Experimented to find the optimial initial size for a short string.
1461                                  Eight seems to offer the best tradeoff between space and time.
1462                          
1463                              10. Inlined all members of the Char16 class.
1464                          
1465                              11. Used Uint16 internally in the String class. This showed no improvememnt
1466                                  since Char16 was already fully inlined and was essentially reduced to
1467                                  Uint16 in any case.
1468                          
1469                              12. Implemented conditional logic (#if) allowing error checking logic to
1470 david.dillard  1.116             be excluded to better performance. Examples include bounds checking
1471 mike           1.112             and null-pointer checking.
1472                          
1473                              13. Used memcpy() and memcmp() where possible. These are implemented using
1474                                  the rep family of intructions under Intel and are much faster.
1475                          
1476 david.dillard  1.116         14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1477 mike           1.112             copy routine overhead.
1478                          
1479                              15. Added ASCII7 form of the constructor and assign().
1480                          
1481                                      String s("hello world", String::ASCII7);
1482                          
1483                                      s.assignASCII7("hello world");
1484                          
1485                                  This avoids slower UTF8 processing when not needed.
1486                          
1487                          ================================================================================
1488                          
1489                          TO-DO:
1490                          
1491                              (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1492                          
1493                              (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1494                          
1495                              (+) [DONE] Eliminate char versions of find() and append().
1496                          
1497                              (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1498 mike           1.112     
1499                              (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1500                          
1501                              (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1502                          
1503                              (+) [DONE] Comment StringRep allocation layout.
1504                          
1505                              (+) [DONE] Conceal private inline functions.
1506                          
1507                              (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1508                          
1509                              (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1510                                  rid of altogether.
1511                          
1512                              (+) [DONE] useCamelNotationOnAllFunctionNames.
1513                          
1514                              (+) [DONE] Check for overlow condition in StringRep::alloc().
1515                          
1516                              (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1517                          
1518                              (+) [DONE] Fix throw-related memory leak.
1519 mike           1.112     
1520                              (+) [DONE] Look at PEP223 for coding security guidelines.
1521                          
1522                              (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1523 kumpf          1.39      
1524 mike           1.112         (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1525 kumpf          1.39      
1526 mike           1.112         (+) DOC++ String.h - will open new bug?
1527 kumpf          1.39      
1528 mike           1.112         (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1529                          	on certain platforms).
1530 kumpf          1.39      
1531 mike           1.112     ================================================================================
1532                          */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2