(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.119 //%2006////////////////////////////////////////////////////////////////////////
   2 mike  1.27  //
   3 karl  1.97  // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4             // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5             // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85  // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97  // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8             // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98  // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10             // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 karl  1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  12             // EMC Corporation; Symantec Corporation; The Open Group.
  13 mike  1.27  //
  14             // Permission is hereby granted, free of charge, to any person obtaining a copy
  15 kumpf 1.41  // of this software and associated documentation files (the "Software"), to
  16             // deal in the Software without restriction, including without limitation the
  17             // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  18 mike  1.27  // sell copies of the Software, and to permit persons to whom the Software is
  19             // furnished to do so, subject to the following conditions:
  20 karl  1.119 // 
  21 kumpf 1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  22 mike  1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  23             // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  24 kumpf 1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  25             // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  26             // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  27 mike  1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28             // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29             //
  30             //==============================================================================
  31             //
  32 mike  1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
  33 mike  1.27  //
  34 david.dillard 1.116 // Modified By:
  35 mike          1.112 //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  36                     //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  37 david.dillard 1.116 //     David Dillard, Symantec Corp. (david_dillard@symantec.com)
  38 mike          1.112 //     Mike Brasher (mike-brasher@austin.rr.com)
  39 mike          1.27  //
  40                     //%/////////////////////////////////////////////////////////////////////////////
  41                     
  42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  43 mike           1.113 #include <cstring>
  44 kumpf          1.48  #include "InternalException.h"
  45 david          1.69  #include "CommonUTF.h"
  46 mike           1.112 #include "MessageLoader.h"
  47                      #include "StringRep.h"
  48 david          1.69  
  49                      #ifdef PEGASUS_HAS_ICU
  50 chuck          1.99  #include <unicode/ustring.h>
  51                      #include <unicode/uchar.h>
  52 david          1.69  #endif
  53                      
  54 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  55 mike           1.28  
  56 mike           1.112 //==============================================================================
  57                      //
  58                      // Compile-time macros (undefined by default).
  59                      //
  60                      //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  61 david.dillard  1.116 //
  62 mike           1.112 //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  63                      //
  64                      //==============================================================================
  65 mike           1.27  
  66 mike           1.112 //==============================================================================
  67 kumpf          1.39  //
  68 mike           1.112 // File-scope definitions:
  69 kumpf          1.54  //
  70 mike           1.112 //==============================================================================
  71                      
  72                      // Note: this table is much faster than the system toupper(). Please do not
  73                      // change.
  74 kumpf          1.54  
  75 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  76 kumpf          1.54  {
  77 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  78                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  79                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  80                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  81                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  82                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  83                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  84                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  85                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  86                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  87                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  88                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  89                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  90                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  91                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  92                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  93                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  94                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  95                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  96                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  97                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  98 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  99                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 100                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 101                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 102                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 103                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 104                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 105                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 106                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 107                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 108                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 109                      };
 110                      
 111                      // Note: this table is much faster than the system tulower(). Please do not
 112                      // change.
 113                      
 114 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 115 mike           1.112 {
 116                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 117                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 118                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 119                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 120                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 121                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 122                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 123                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 124                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 125                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 126                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 127                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 128                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 129                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 130                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 131                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 132                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 133                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 134                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 135                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 136 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 137                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 138                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 139                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 140                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 141                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 142                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 143                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 144                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 145                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 146                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 147                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 148                      };
 149                      
 150                      // Converts 16-bit characters to upper case. This routine is faster than the
 151                      // system toupper(). Please do not change.
 152                      inline Uint16 _toUpper(Uint16 x)
 153                      {
 154                          return (x & 0xFF00) ? x : _toUpperTable[x];
 155 kumpf          1.54  }
 156                      
 157 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 158                      // system toupper(). Please do not change.
 159                      inline Uint16 _toLower(Uint16 x)
 160 kumpf          1.54  {
 161 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 162                      }
 163                      
 164                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 165                      static Uint32 _roundUpToPow2(Uint32 x)
 166                      {
 167                      #ifndef PEGASUS_STRING_NO_THROW
 168                      
 169 dave.sudlik    1.119.2.1     // Check for potential overflow in x
 170                              PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
 171 mike           1.112     
 172                          #endif
 173                          
 174                              if (x < 8)
 175                                  return 8;
 176                          
 177                              x--;
 178                              x |= (x >> 1);
 179                              x |= (x >> 2);
 180                              x |= (x >> 4);
 181                              x |= (x >> 8);
 182                              x |= (x >> 16);
 183                              x++;
 184                          
 185                              return x;
 186                          }
 187                          
 188                          template<class P, class Q>
 189                          static void _copy(P* p, const Q* q, size_t n)
 190                          {
 191                              // The following employs loop unrolling for efficiency. Please do not
 192 mike           1.112         // eliminate.
 193                          
 194                              while (n >= 8)
 195                              {
 196                                  p[0] = q[0];
 197                                  p[1] = q[1];
 198                                  p[2] = q[2];
 199                                  p[3] = q[3];
 200                                  p[4] = q[4];
 201                                  p[5] = q[5];
 202                                  p[6] = q[6];
 203                                  p[7] = q[7];
 204                                  p += 8;
 205                                  q += 8;
 206                                  n -= 8;
 207                              }
 208                          
 209                              while (n >= 4)
 210                              {
 211                                  p[0] = q[0];
 212                                  p[1] = q[1];
 213 mike           1.112             p[2] = q[2];
 214                                  p[3] = q[3];
 215                                  p += 4;
 216                                  q += 4;
 217                                  n -= 4;
 218                              }
 219                          
 220                              while (n--)
 221                                  *p++ = *q++;
 222                          }
 223                          
 224                          static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 225                          {
 226                              // The following employs loop unrolling for efficiency. Please do not
 227                              // eliminate.
 228                          
 229                              while (n >= 4)
 230                              {
 231                                  if (s[0] == c)
 232                                      return (Uint16*)s;
 233                                  if (s[1] == c)
 234 mike           1.112                 return (Uint16*)&s[1];
 235                                  if (s[2] == c)
 236                                      return (Uint16*)&s[2];
 237                                  if (s[3] == c)
 238                                      return (Uint16*)&s[3];
 239 kumpf          1.82      
 240 mike           1.112             n -= 4;
 241                                  s += 4;
 242                              }
 243                          
 244                              if (n)
 245                              {
 246                                  if (*s == c)
 247                                      return (Uint16*)s;
 248                                  s++;
 249                                  n--;
 250                              }
 251                          
 252                              if (n)
 253                              {
 254                                  if (*s == c)
 255                                      return (Uint16*)s;
 256                                  s++;
 257                                  n--;
 258                              }
 259                          
 260                              if (n && *s == c)
 261 mike           1.112             return (Uint16*)s;
 262                          
 263                              // Not found!
 264                              return 0;
 265                          }
 266                          
 267                          static int _compare(const Uint16* s1, const Uint16* s2)
 268                          {
 269                              while (*s1 && *s2)
 270                              {
 271                                  int r = *s1++ - *s2++;
 272                          
 273                                  if (r)
 274                                      return r;
 275                              }
 276                          
 277                              if (*s2)
 278                                  return -1;
 279                              else if (*s1)
 280                                  return 1;
 281                          
 282 mike           1.112         return 0;
 283                          }
 284                          
 285                          static int _compareNoUTF8(const Uint16* s1, const char* s2)
 286                          {
 287                              Uint16 c1;
 288                              Uint16 c2;
 289                          
 290                              do
 291                              {
 292                                  c1 = *s1++;
 293                                  c2 = *s2++;
 294                          
 295                                  if (c1 == 0)
 296                                      return c1 - c2;
 297                              }
 298                              while (c1 == c2);
 299                          
 300                              return c1 - c2;
 301                          }
 302                          
 303 mike           1.112     static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 304                          {
 305                              memcpy(s1, s2, n * sizeof(Uint16));
 306                          }
 307                          
 308                          void StringThrowOutOfBounds()
 309                          {
 310                              throw IndexOutOfBoundsException();
 311                          }
 312                          
 313                          inline void _checkNullPointer(const void* ptr)
 314                          {
 315 kumpf          1.117     #ifndef PEGASUS_STRING_NO_THROW
 316 mike           1.112     
 317                              if (!ptr)
 318                                  throw NullPointer();
 319                          
 320                          #endif
 321                          }
 322                          
 323                          static void _StringThrowBadUTF8(Uint32 index)
 324                          {
 325                              MessageLoaderParms parms(
 326                                  "Common.String.BAD_UTF8",
 327                                  "The byte sequence starting at index $0 "
 328                                  "is not valid UTF-8 encoding.",
 329                                  index);
 330                              throw Exception(parms);
 331                          }
 332                          
 333                          static size_t _copyFromUTF8(
 334 david.dillard  1.116         Uint16* dest,
 335                              const char* src,
 336 mike           1.112         size_t n,
 337                              size_t& utf8_error_index)
 338                          {
 339                              Uint16* p = dest;
 340                              const Uint8* q = (const Uint8*)src;
 341                          
 342                              // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 343                              // Use loop-unrolling.
 344                          
 345                              while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 346                              {
 347                                  p[0] = q[0];
 348                                  p[1] = q[1];
 349                                  p[2] = q[2];
 350                                  p[3] = q[3];
 351                                  p[4] = q[4];
 352                                  p[5] = q[5];
 353                                  p[6] = q[6];
 354                                  p[7] = q[7];
 355                                  p += 8;
 356                                  q += 8;
 357 mike           1.112             n -= 8;
 358                              }
 359                          
 360                              while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 361                              {
 362                                  p[0] = q[0];
 363                                  p[1] = q[1];
 364                                  p[2] = q[2];
 365                                  p[3] = q[3];
 366                                  p += 4;
 367                                  q += 4;
 368                                  n -= 4;
 369                              }
 370                          
 371                              switch (n)
 372                              {
 373                                  case 0:
 374                                      return p - dest;
 375                                  case 1:
 376                                      if (q[0] < 128)
 377                                      {
 378 mike           1.112                     p[0] = q[0];
 379                                          return p + 1 - dest;
 380                                      }
 381                                      break;
 382                                  case 2:
 383                                      if (((q[0]|q[1]) & 0x80) == 0)
 384                                      {
 385                                          p[0] = q[0];
 386                                          p[1] = q[1];
 387                                          return p + 2 - dest;
 388                                      }
 389                                      break;
 390                                  case 3:
 391                                      if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 392                                      {
 393                                          p[0] = q[0];
 394                                          p[1] = q[1];
 395                                          p[2] = q[2];
 396                                          return p + 3 - dest;
 397                                      }
 398                                      break;
 399 mike           1.112         }
 400                          
 401                              // Process remaining characters.
 402                          
 403                              while (n)
 404                              {
 405                                  // Optimize for 7-bit ASCII case.
 406                          
 407                                  if (*q < 128)
 408                                  {
 409                                      *p++ = *q++;
 410                                      n--;
 411                                  }
 412                                  else
 413                                  {
 414                                      Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 415                          
 416                                      if (c > n || !isValid_U8(q, c) ||
 417                                          UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 418                                      {
 419                                          utf8_error_index = q - (const Uint8*)src;
 420 mike           1.112                     return size_t(-1);
 421                                      }
 422                          
 423                                      n -= c;
 424                                  }
 425                              }
 426                          
 427                              return p - dest;
 428                          }
 429                          
 430 david.dillard  1.116     // Note: dest must be at least three times src (plus an extra byte for
 431 mike           1.112     // terminator).
 432                          static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 433                          {
 434                              // The following employs loop unrolling for efficiency. Please do not
 435                              // eliminate.
 436                          
 437                              const Uint16* q = src;
 438                              Uint8* p = (Uint8*)dest;
 439                          
 440                              while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 441 kumpf          1.82          {
 442 mike           1.112             p[0] = q[0];
 443                                  p[1] = q[1];
 444                                  p[2] = q[2];
 445                                  p[3] = q[3];
 446                                  p += 4;
 447                                  q += 4;
 448                                  n -= 4;
 449 kumpf          1.82          }
 450 mike           1.112     
 451                              switch (n)
 452                              {
 453                                  case 0:
 454                                      return p - (Uint8*)dest;
 455                                  case 1:
 456                                      if (q[0] < 128)
 457                                      {
 458                                          p[0] = q[0];
 459                                          return p + 1 - (Uint8*)dest;
 460                                      }
 461                                      break;
 462                                  case 2:
 463                                      if (q[0] < 128 && q[1] < 128)
 464                                      {
 465                                          p[0] = q[0];
 466                                          p[1] = q[1];
 467                                          return p + 2 - (Uint8*)dest;
 468                                      }
 469                                      break;
 470                                  case 3:
 471 mike           1.112                 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 472                                      {
 473                                          p[0] = q[0];
 474                                          p[1] = q[1];
 475                                          p[2] = q[2];
 476                                          return p + 3 - (Uint8*)dest;
 477                                      }
 478                                      break;
 479                              }
 480                          
 481                              // If this line was reached, there must be characters greater than 128.
 482                          
 483                              UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 484                          
 485                              return p - (Uint8*)dest;
 486 kumpf          1.54      }
 487                          
 488 mike           1.112     static inline size_t _convert(
 489                              Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
 490 kumpf          1.54      {
 491 mike           1.112     #ifdef PEGASUS_STRING_NO_UTF8
 492                              _copy(p, q, n);
 493                              return n;
 494                          #else
 495                              return _copyFromUTF8(p, q, n, utf8_error_index);
 496                          #endif
 497 kumpf          1.54      }
 498                          
 499 mike           1.112     //==============================================================================
 500                          //
 501                          // class CString
 502                          //
 503                          //==============================================================================
 504                          
 505                          CString::CString(const CString& cstr) : _rep(0)
 506 kumpf          1.54      {
 507 mike           1.112         if (cstr._rep)
 508 kumpf          1.82          {
 509 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 510                                  _rep = (char*)operator new(n);
 511                                  memcpy(_rep, cstr._rep, n);
 512 kumpf          1.82          }
 513 kumpf          1.54      }
 514                          
 515 kumpf          1.56      CString& CString::operator=(const CString& cstr)
 516                          {
 517 kumpf          1.82          if (&cstr != this)
 518 kumpf          1.81          {
 519 kumpf          1.82              if (_rep)
 520                                  {
 521 mike           1.112                 operator delete(_rep);
 522 kumpf          1.82                  _rep = 0;
 523                                  }
 524 mike           1.112     
 525 kumpf          1.82              if (cstr._rep)
 526                                  {
 527 mike           1.112                 size_t n = strlen(cstr._rep) + 1;
 528                                      _rep = (char*)operator new(n);
 529                                      memcpy(_rep, cstr._rep, n);
 530 kumpf          1.82              }
 531 kumpf          1.81          }
 532 mike           1.112     
 533 kumpf          1.56          return *this;
 534                          }
 535                          
 536 mike           1.112     //==============================================================================
 537 kumpf          1.54      //
 538 mike           1.112     // class StringRep
 539 kumpf          1.39      //
 540 mike           1.112     //==============================================================================
 541 kumpf          1.39      
 542 mike           1.112     StringRep StringRep::_emptyRep;
 543 mike           1.27      
 544 mike           1.112     inline StringRep* StringRep::alloc(size_t cap)
 545 mike           1.27      {
 546 mike           1.112     #ifndef PEGASUS_STRING_NO_THROW
 547 dave.sudlik    1.119.2.1     
 548                              // Check for potential overflow in cap
 549                              PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
 550 mike           1.27      
 551 mike           1.112     #endif
 552 mike           1.27      
 553 mike           1.112         StringRep* rep = (StringRep*)::operator new(
 554                                  sizeof(StringRep) + cap * sizeof(Uint16));
 555                              rep->cap = cap;
 556                              new(&rep->refs) AtomicInt(1);
 557                          
 558                              return rep;
 559 mike           1.27      }
 560                          
 561 mike           1.112     static inline void _reserve(StringRep*& rep, Uint32 cap)
 562 chuck          1.102     {
 563 mike           1.114         if (cap > rep->cap || rep->refs.get() != 1)
 564 chuck          1.102         {
 565 mike           1.112             size_t n = _roundUpToPow2(cap);
 566                                  StringRep* newRep = StringRep::alloc(n);
 567                                  newRep->size = rep->size;
 568                                  _copy(newRep->data, rep->data, rep->size + 1);
 569                                  StringRep::unref(rep);
 570                                  rep = newRep;
 571                              }
 572                          }
 573 david.dillard  1.105     
 574 mike           1.112     StringRep* StringRep::create(const Uint16* data, size_t size)
 575                          {
 576                              StringRep* rep = StringRep::alloc(size);
 577                              rep->size = size;
 578                              _copy(rep->data, data, size);
 579                              rep->data[size] = '\0';
 580                              return rep;
 581                          }
 582 chuck          1.102     
 583 mike           1.112     StringRep* StringRep::copyOnWrite(StringRep* rep)
 584                          {
 585                              // Return a new copy of rep. Release rep.
 586 chuck          1.102     
 587 mike           1.112         StringRep* newRep = StringRep::alloc(rep->size);
 588                              newRep->size = rep->size;
 589                              _copy(newRep->data, rep->data, rep->size);
 590                              newRep->data[newRep->size] = '\0';
 591                              StringRep::unref(rep);
 592                              return newRep;
 593 chuck          1.102     }
 594                          
 595 mike           1.112     StringRep* StringRep::create(const char* data, size_t size)
 596 kumpf          1.43      {
 597 mike           1.112         StringRep* rep = StringRep::alloc(size);
 598                              size_t utf8_error_index;
 599                              rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 600                          
 601                          #ifndef PEGASUS_STRING_NO_THROW
 602                              if (rep->size == size_t(-1))
 603                              {
 604                                  StringRep::free(rep);
 605                                  _StringThrowBadUTF8(utf8_error_index);
 606                              }
 607                          #endif
 608 kumpf          1.43      
 609 mike           1.112         rep->data[rep->size] = '\0';
 610 kumpf          1.43      
 611 mike           1.112         return rep;
 612 mike           1.27      }
 613                          
 614 mike           1.112     Uint32 StringRep::length(const Uint16* str)
 615 mike           1.27      {
 616 mike           1.112         // Note: We could unroll this but it is rarely called.
 617                          
 618                              const Uint16* end = (Uint16*)str;
 619                          
 620                              while (*end++)
 621                                  ;
 622                          
 623                              return end - str - 1;
 624 kumpf          1.39      }
 625 tony           1.66      
 626 mike           1.112     //==============================================================================
 627                          //
 628                          // class String
 629                          //
 630                          //==============================================================================
 631                          
 632                          const String String::EMPTY;
 633 mike           1.27      
 634 kumpf          1.39      String::String(const String& str, Uint32 n)
 635                          {
 636 mike           1.112         _checkBounds(n, str._rep->size);
 637                              _rep = StringRep::create(str._rep->data, n);
 638 kumpf          1.39      }
 639                          
 640                          String::String(const Char16* str)
 641                          {
 642 mike           1.112         _checkNullPointer(str);
 643                              _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 644 mike           1.27      }
 645                          
 646 kumpf          1.39      String::String(const Char16* str, Uint32 n)
 647                          {
 648 mike           1.112         _checkNullPointer(str);
 649                              _rep = StringRep::create((Uint16*)str, n);
 650 kumpf          1.39      }
 651                          
 652                          String::String(const char* str)
 653 mike           1.27      {
 654 mike           1.112         _checkNullPointer(str);
 655 david.dillard  1.105     
 656 mike           1.112         // Set this just in case create() throws an exception.
 657                              _rep = &StringRep::_emptyRep;
 658                              _rep = StringRep::create(str, strlen(str));
 659 mike           1.27      }
 660                          
 661 kumpf          1.39      String::String(const char* str, Uint32 n)
 662 mike           1.27      {
 663 mike           1.112         _checkNullPointer(str);
 664 david.dillard  1.105     
 665 mike           1.112         // Set this just in case create() throws an exception.
 666                              _rep = &StringRep::_emptyRep;
 667                              _rep = StringRep::create(str, n);
 668 kumpf          1.39      }
 669 mike           1.27      
 670 mike           1.112     String::String(const String& s1, const String& s2)
 671 kumpf          1.39      {
 672 mike           1.112         size_t n1 = s1._rep->size;
 673                              size_t n2 = s2._rep->size;
 674                              size_t n = n1 + n2;
 675                              _rep = StringRep::alloc(n);
 676                              _copy(_rep->data, s1._rep->data, n1);
 677                              _copy(_rep->data + n1, s2._rep->data, n2);
 678                              _rep->size = n;
 679                              _rep->data[n] = '\0';
 680 mike           1.27      }
 681                          
 682 mike           1.112     String::String(const String& s1, const char* s2)
 683 mike           1.27      {
 684 mike           1.112         _checkNullPointer(s2);
 685                              size_t n1 = s1._rep->size;
 686                              size_t n2 = strlen(s2);
 687                              _rep = StringRep::alloc(n1 + n2);
 688                              _copy(_rep->data, s1._rep->data, n1);
 689                              size_t utf8_error_index;
 690                              size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 691                          
 692                          #ifndef PEGASUS_STRING_NO_THROW
 693                              if (tmp == size_t(-1))
 694 kumpf          1.82          {
 695 mike           1.112             StringRep::free(_rep);
 696                                  _rep = &StringRep::_emptyRep;
 697                                  _StringThrowBadUTF8(utf8_error_index);
 698 kumpf          1.82          }
 699 mike           1.112     #endif
 700                          
 701                              _rep->size = n1 + tmp;
 702                              _rep->data[_rep->size] = '\0';
 703 mike           1.27      }
 704                          
 705 mike           1.112     String::String(const char* s1, const String& s2)
 706 mike           1.27      {
 707 mike           1.112         _checkNullPointer(s1);
 708                              size_t n1 = strlen(s1);
 709                              size_t n2 = s2._rep->size;
 710                              _rep = StringRep::alloc(n1 + n2);
 711                              size_t utf8_error_index;
 712                              size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 713                          
 714                          #ifndef PEGASUS_STRING_NO_THROW
 715                              if (tmp ==  size_t(-1))
 716                              {
 717                                  StringRep::free(_rep);
 718                                  _rep = &StringRep::_emptyRep;
 719                                  _StringThrowBadUTF8(utf8_error_index);
 720                              }
 721                          #endif
 722                          
 723                              _rep->size = n2 + tmp;
 724                              _copy(_rep->data + n1, s2._rep->data, n2);
 725                              _rep->data[_rep->size] = '\0';
 726 mike           1.27      }
 727                          
 728 mike           1.112     String& String::assign(const String& str)
 729 mike           1.27      {
 730 mike           1.112         if (_rep != str._rep)
 731 david.dillard  1.105         {
 732 mike           1.112             StringRep::unref(_rep);
 733                                  StringRep::ref(_rep = str._rep);
 734 david.dillard  1.105         }
 735                          
 736 mike           1.27          return *this;
 737                          }
 738                          
 739                          String& String::assign(const Char16* str, Uint32 n)
 740                          {
 741 mike           1.112         _checkNullPointer(str);
 742                          
 743 mike           1.114         if (n > _rep->cap || _rep->refs.get() != 1)
 744 david.dillard  1.105         {
 745 mike           1.112             StringRep::unref(_rep);
 746                                  _rep = StringRep::alloc(n);
 747 david.dillard  1.105         }
 748                          
 749 mike           1.112         _rep->size = n;
 750                              _copy(_rep->data, (Uint16*)str, n);
 751                              _rep->data[n] = '\0';
 752                          
 753 mike           1.27          return *this;
 754                          }
 755                          
 756 mike           1.112     String& String::assign(const char* str, Uint32 n)
 757 chuck          1.102     {
 758 mike           1.112         _checkNullPointer(str);
 759                          
 760 mike           1.114         if (n > _rep->cap || _rep->refs.get() != 1)
 761 david.dillard  1.105         {
 762 mike           1.112             StringRep::unref(_rep);
 763                                  _rep = StringRep::alloc(n);
 764 david.dillard  1.105         }
 765                          
 766 mike           1.112         size_t utf8_error_index;
 767                              _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 768 chuck          1.102     
 769 mike           1.112     #ifndef PEGASUS_STRING_NO_THROW
 770                              if (_rep->size ==  size_t(-1))
 771 david.dillard  1.105         {
 772 mike           1.112             StringRep::free(_rep);
 773                                  _rep = &StringRep::_emptyRep;
 774                                  _StringThrowBadUTF8(utf8_error_index);
 775 david.dillard  1.105         }
 776 mike           1.112     #endif
 777                          
 778                              _rep->data[_rep->size] = 0;
 779 david.dillard  1.105     
 780 mike           1.27          return *this;
 781                          }
 782                          
 783 kumpf          1.39      void String::clear()
 784                          {
 785 mike           1.112         if (_rep->size)
 786                              {
 787 mike           1.114             if (_rep->refs.get() == 1)
 788 mike           1.112             {
 789                                      _rep->size = 0;
 790                                      _rep->data[0] = '\0';
 791                                  }
 792                                  else
 793                                  {
 794                                      StringRep::unref(_rep);
 795                                      _rep = &StringRep::_emptyRep;
 796                                  }
 797                              }
 798 kumpf          1.39      }
 799                          
 800 mike           1.112     void String::reserveCapacity(Uint32 cap)
 801 kumpf          1.39      {
 802 mike           1.112         _reserve(_rep, cap);
 803 kumpf          1.39      }
 804                          
 805 mike           1.112     CString String::getCString() const
 806                          {
 807 david.dillard  1.116         // A UTF8 string can have three times as many characters as its UTF16
 808                              // counterpart, so we allocate extra memory for the worst case. In the
 809 mike           1.112         // best case, we may need only one third of the memory allocated. But
 810 david.dillard  1.116         // downsizing the string afterwards is expensive and unecessary since
 811                              // CString objects are usually short-lived (disappearing after only a few
 812 mike           1.112         // instructions). CString objects are typically created on the stack as
 813                              // means to obtain a char* pointer.
 814                          
 815                          #ifdef PEGASUS_STRING_NO_UTF8
 816                              char* str = (char*)operator new(_rep->size + 1);
 817                              _copy(str, _rep->data, _rep->size);
 818                              str[_rep->size] = '\0';
 819                              return CString(str);
 820 gs.keenan      1.110     #else
 821 mike           1.112         Uint32 n = 3 * _rep->size;
 822                              char* str = (char*)operator new(n + 1);
 823                              size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 824                              str[size] = '\0';
 825                              return CString(str);
 826 gs.keenan      1.110     #endif
 827 kumpf          1.39      }
 828                          
 829 mike           1.112     String& String::append(const Char16* str, Uint32 n)
 830 kumpf          1.39      {
 831 mike           1.112         _checkNullPointer(str);
 832                          
 833                              size_t oldSize = _rep->size;
 834                              size_t newSize = oldSize + n;
 835                              _reserve(_rep, newSize);
 836                              _copy(_rep->data + oldSize, (Uint16*)str, n);
 837                              _rep->size = newSize;
 838                              _rep->data[newSize] = '\0';
 839                          
 840                              return *this;
 841 kumpf          1.39      }
 842                          
 843 mike           1.112     String& String::append(const String& str)
 844 mike           1.27      {
 845 w.otsuka       1.119.2.2     return append((Char16*)(&(str._rep->data[0])), str._rep->size);
 846 mike           1.27      }
 847                          
 848 mike           1.112     String& String::append(const char* str, Uint32 size)
 849 mike           1.27      {
 850 mike           1.112         _checkNullPointer(str);
 851                          
 852                              size_t oldSize = _rep->size;
 853                              size_t cap = oldSize + size;
 854                          
 855                              _reserve(_rep, cap);
 856                              size_t utf8_error_index;
 857                              size_t tmp = _convert(
 858                                  (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 859                          
 860                          #ifndef PEGASUS_STRING_NO_THROW
 861                              if (tmp ==  size_t(-1))
 862                              {
 863                                  StringRep::free(_rep);
 864                                  _rep = &StringRep::_emptyRep;
 865                                  _StringThrowBadUTF8(utf8_error_index);
 866                              }
 867                          #endif
 868 mike           1.27      
 869 mike           1.112         _rep->size += tmp;
 870                              _rep->data[_rep->size] = '\0';
 871 mike           1.27      
 872 kumpf          1.39          return *this;
 873                          }
 874                          
 875 mike           1.112     void String::remove(Uint32 index, Uint32 n)
 876 mike           1.27      {
 877 mike           1.112         if (n == PEG_NOT_FOUND)
 878                                  n = _rep->size - index;
 879                          
 880                              _checkBounds(index + n, _rep->size);
 881                          
 882 mike           1.114         if (_rep->refs.get() != 1)
 883 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
 884 mike           1.27      
 885 jim.wunderlich 1.115         PEGASUS_ASSERT(index + n <= _rep->size);
 886 mike           1.27      
 887 mike           1.112         size_t rem = _rep->size - (index + n);
 888                              Uint16* data = _rep->data;
 889 mike           1.27      
 890 mike           1.112         if (rem)
 891                                  memmove(data + index, data + index + n, rem * sizeof(Uint16));
 892 mike           1.27      
 893 mike           1.112         _rep->size -= n;
 894                              data[_rep->size] = '\0';
 895 mike           1.27      }
 896                          
 897 mike           1.112     String String::subString(Uint32 index, Uint32 n) const
 898 mike           1.27      {
 899 mike           1.112         // Note: this implementation is very permissive but used for
 900                              // backwards compatibility.
 901                          
 902                              if (index < _rep->size)
 903 mike           1.27          {
 904 mike           1.112             if (n == PEG_NOT_FOUND || n > _rep->size - index)
 905                                      n = _rep->size - index;
 906 mike           1.27      
 907 w.otsuka       1.119.2.2         return String((Char16*)(_rep->data + index), n);
 908 mike           1.27          }
 909 david.dillard  1.105     
 910                              return String();
 911 mike           1.27      }
 912                          
 913                          Uint32 String::find(Char16 c) const
 914                          {
 915 mike           1.112         Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 916 mike           1.27      
 917 mike           1.112         if (p)
 918 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 919 mike           1.27      
 920                              return PEG_NOT_FOUND;
 921                          }
 922                          
 923 kumpf          1.53      Uint32 String::find(Uint32 index, Char16 c) const
 924 mike           1.30      {
 925 mike           1.112         _checkBounds(index, _rep->size);
 926                          
 927                              if (index >= _rep->size)
 928                                  return PEG_NOT_FOUND;
 929                          
 930                              Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 931 mike           1.30      
 932 mike           1.112         if (p)
 933 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 934 mike           1.30      
 935                              return PEG_NOT_FOUND;
 936                          }
 937                          
 938 mike           1.112     Uint32 StringFindAux(
 939                              const StringRep* _rep, const Char16* s, Uint32 n)
 940 mike           1.27      {
 941 mike           1.112         _checkNullPointer(s);
 942 mike           1.27      
 943 mike           1.112         const Uint16* data = _rep->data;
 944                              size_t rem = _rep->size;
 945                          
 946                              while (n <= rem)
 947 mike           1.30          {
 948 mike           1.112             Uint16* p = (Uint16*)_find(data, rem, s[0]);
 949                          
 950                                  if (!p)
 951                                      break;
 952 mike           1.30      
 953 mike           1.112             if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 954 david.dillard  1.116                 return static_cast<Uint32>(p - _rep->data);
 955 david.dillard  1.105     
 956 mike           1.112             p++;
 957                                  rem -= p - data;
 958                                  data = p;
 959 mike           1.27          }
 960 mike           1.112     
 961 mike           1.27          return PEG_NOT_FOUND;
 962                          }
 963                          
 964 mike           1.112     Uint32 String::find(const char* s) const
 965                          {
 966                              _checkNullPointer(s);
 967                          
 968                              // Note: could optimize away creation of temporary, but this is rarely
 969                              // called.
 970                              return find(String(s));
 971                          }
 972                          
 973 mike           1.27      Uint32 String::reverseFind(Char16 c) const
 974                          {
 975 mike           1.112         Uint16 x = c;
 976                              Uint16* p = _rep->data;
 977                              Uint16* q = _rep->data + _rep->size;
 978 mike           1.27      
 979 mike           1.112         while (q != p)
 980 mike           1.27          {
 981 mike           1.112             if (*--q == x)
 982 david.dillard  1.116                 return static_cast<Uint32>(q - p);
 983 mike           1.27          }
 984                          
 985                              return PEG_NOT_FOUND;
 986                          }
 987                          
 988                          void String::toLower()
 989                          {
 990 david          1.69      #ifdef PEGASUS_HAS_ICU
 991 mike           1.112     
 992 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
 993 david          1.90          {
 994 mike           1.114             if (_rep->refs.get() != 1)
 995 mike           1.112                 _rep = StringRep::copyOnWrite(_rep);
 996                          
 997 yi.zhou        1.108             // This will do a locale-insensitive, but context-sensitive convert.
 998 david.dillard  1.116             // Since context-sensitive casing looks at adjacent chars, this
 999                                  // prevents optimizations where the us-ascii is converted before
1000 mike           1.112             // calling ICU.
1001 yi.zhou        1.108             // The string may shrink or expand after the convert.
1002                          
1003 mike           1.112             //// First calculate size of resulting string. u_strToLower() returns
1004                                  //// only the size when zero is passed as the destination size argument.
1005                          
1006 yi.zhou        1.108             UErrorCode err = U_ZERO_ERROR;
1007                          
1008 mike           1.112             int32_t newSize = u_strToLower(
1009                                      NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1010 david.dillard  1.116     
1011 mike           1.112             err = U_ZERO_ERROR;
1012                          
1013                                  //// Reserve enough space for the result.
1014                          
1015                                  if ((Uint32)newSize > _rep->cap)
1016                                      _reserve(_rep, newSize);
1017                          
1018                                  //// Perform the conversion (overlapping buffers are allowed).
1019 chuck          1.99      
1020 mike           1.112             u_strToLower((UChar*)_rep->data, newSize,
1021                                      (UChar*)_rep->data, _rep->size, NULL, &err);
1022 yi.zhou        1.108     
1023 mike           1.112             _rep->size = newSize;
1024                                  return;
1025 david          1.90          }
1026 mike           1.112     
1027                          #endif /* PEGASUS_HAS_ICU */
1028                          
1029 mike           1.114         if (_rep->refs.get() != 1)
1030 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1031                          
1032                              Uint16* p = _rep->data;
1033                              size_t n = _rep->size;
1034                          
1035                              for (; n--; p++)
1036 david          1.90          {
1037 mike           1.112             if (!(*p & 0xFF00))
1038                                      *p = _toLower(*p);
1039 mike           1.27          }
1040 kumpf          1.39      }
1041                          
1042 chuck          1.99      void String::toUpper()
1043 david          1.90      {
1044                          #ifdef PEGASUS_HAS_ICU
1045 mike           1.112     
1046 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
1047 chuck          1.99          {
1048 mike           1.114             if (_rep->refs.get() != 1)
1049 mike           1.112                 _rep = StringRep::copyOnWrite(_rep);
1050                          
1051 yi.zhou        1.108             // This will do a locale-insensitive, but context-sensitive convert.
1052 david.dillard  1.116             // Since context-sensitive casing looks at adjacent chars, this
1053                                  // prevents optimizations where the us-ascii is converted before
1054 mike           1.112             // calling ICU.
1055 yi.zhou        1.108             // The string may shrink or expand after the convert.
1056                          
1057 mike           1.112             //// First calculate size of resulting string. u_strToUpper() returns
1058                                  //// only the size when zero is passed as the destination size argument.
1059                          
1060 yi.zhou        1.108             UErrorCode err = U_ZERO_ERROR;
1061                          
1062 mike           1.112             int32_t newSize = u_strToUpper(
1063                                      NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1064                          
1065                                  err = U_ZERO_ERROR;
1066                          
1067                                  //// Reserve enough space for the result.
1068                          
1069                                  if ((Uint32)newSize > _rep->cap)
1070                                      _reserve(_rep, newSize);
1071                          
1072                                  //// Perform the conversion (overlapping buffers are allowed).
1073                          
1074                                  u_strToUpper((UChar*)_rep->data, newSize,
1075                                      (UChar*)_rep->data, _rep->size, NULL, &err);
1076 chuck          1.99      
1077 mike           1.112             _rep->size = newSize;
1078 yi.zhou        1.108     
1079 mike           1.112             return;
1080 david          1.91          }
1081 mike           1.112     
1082                          #endif /* PEGASUS_HAS_ICU */
1083                          
1084 mike           1.114         if (_rep->refs.get() != 1)
1085 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1086                          
1087                              Uint16* p = _rep->data;
1088                              size_t n = _rep->size;
1089                          
1090                              for (; n--; p++)
1091                                  *p = _toUpper(*p);
1092 david          1.90      }
1093                          
1094 kumpf          1.43      int String::compare(const String& s1, const String& s2, Uint32 n)
1095 kumpf          1.39      {
1096 kumpf          1.118         const Uint16* p1 = s1._rep->data;
1097                              const Uint16* p2 = s2._rep->data;
1098 mike           1.27      
1099 kumpf          1.118         while (n--)
1100                              {
1101                                  int r = *p1++ - *p2++;
1102                                  if (r)
1103                                  {
1104                                      return r;
1105                                  }
1106                                  else if (!p1[-1])
1107                                  {
1108                                      // We must have encountered a null terminator in both s1 and s2
1109                                      return 0;
1110                                  }
1111                              }
1112                              return 0;
1113 mike           1.27      }
1114                          
1115 kumpf          1.43      int String::compare(const String& s1, const String& s2)
1116 mike           1.30      {
1117 mike           1.112         return _compare(s1._rep->data, s2._rep->data);
1118                          }
1119 kumpf          1.43      
1120 mike           1.112     int String::compare(const String& s1, const char* s2)
1121                          {
1122                              _checkNullPointer(s2);
1123 mike           1.30      
1124 mike           1.112     #ifdef PEGASUS_STRING_NO_UTF8
1125                              return _compareNoUTF8(s1._rep->data, s2);
1126                          #else
1127                              // ATTN: optimize this!
1128                              return String::compare(s1, String(s2));
1129                          #endif
1130 mike           1.30      }
1131                          
1132 mike           1.112     int String::compareNoCase(const String& str1, const String& str2)
1133 kumpf          1.40      {
1134 david          1.69      #ifdef PEGASUS_HAS_ICU
1135 mike           1.112     
1136 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
1137                              {
1138 mike           1.112             return  u_strcasecmp(
1139 dave.sudlik    1.119.2.3             (const UChar*)str1._rep->data,
1140                                      (const UChar*)str2._rep->data,
1141                                      U_FOLD_CASE_DEFAULT
1142                                      );
1143 yi.zhou        1.108         }
1144 kumpf          1.40      
1145 mike           1.112     #endif /* PEGASUS_HAS_ICU */
1146                          
1147                              const Uint16* s1 = str1._rep->data;
1148                              const Uint16* s2 = str2._rep->data;
1149                          
1150                              while (*s1 && *s2)
1151 kumpf          1.40          {
1152 mike           1.112             int r = _toLower(*s1++) - _toLower(*s2++);
1153 kumpf          1.40      
1154 david.dillard  1.105             if (r)
1155                                      return r;
1156 kumpf          1.40          }
1157                          
1158 mike           1.112         if (*s2)
1159 david.dillard  1.105             return -1;
1160 mike           1.112         else if (*s1)
1161 david.dillard  1.105             return 1;
1162 kumpf          1.40      
1163                              return 0;
1164                          }
1165                          
1166 mike           1.112     Boolean StringEqualNoCase(const String& s1, const String& s2)
1167 mike           1.27      {
1168 mike           1.112     #ifdef PEGASUS_HAS_ICU
1169                          
1170                              return String::compareNoCase(s1, s2) == 0;
1171                          
1172                          #else /* PEGASUS_HAS_ICU */
1173 mike           1.27      
1174 mike           1.112         // The following employs loop unrolling for efficiency. Please do not
1175                              // eliminate.
1176 kumpf          1.39      
1177 mike           1.112         Uint16* p = (Uint16*)s1.getChar16Data();
1178                              Uint16* q = (Uint16*)s2.getChar16Data();
1179                              Uint32 n = s2.size();
1180                          
1181                              while (n >= 8)
1182                              {
1183                                  if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1184                                      ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1185                                      ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1186                                      ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1187                                      ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1188                                      ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1189                                      ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1190                                      ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1191                                  {
1192                                      return false;
1193                                  }
1194 kumpf          1.39      
1195 mike           1.112             n -= 8;
1196                                  p += 8;
1197                                  q += 8;
1198                              }
1199 mike           1.27      
1200 mike           1.112         while (n >= 4)
1201 kumpf          1.39          {
1202 mike           1.112             if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1203                                      ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1204                                      ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1205                                      ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1206 david.dillard  1.105             {
1207 mike           1.112                 return false;
1208 david.dillard  1.105             }
1209 mike           1.112     
1210                                  n -= 4;
1211                                  p += 4;
1212                                  q += 4;
1213                              }
1214                          
1215                              while (n--)
1216                              {
1217                                  if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1218 david.dillard  1.105                 return false;
1219 mike           1.112     
1220                                  p++;
1221                                  q++;
1222 kumpf          1.39          }
1223 mike           1.28      
1224 kumpf          1.39          return true;
1225 mike           1.112     
1226                          #endif /* PEGASUS_HAS_ICU */
1227 david          1.69      }
1228                          
1229 mike           1.112     Boolean String::equalNoCase(const String& s1, const char* s2)
1230 david          1.69      {
1231 mike           1.112         _checkNullPointer(s2);
1232 david          1.69      
1233 mike           1.112     #if defined(PEGASUS_HAS_ICU)
1234 david          1.69      
1235 mike           1.112         return String::equalNoCase(s1, String(s2));
1236 david          1.69      
1237 mike           1.112     #elif defined(PEGASUS_STRING_NO_UTF8)
1238 david          1.69      
1239 mike           1.112         const Uint16* p1 = (Uint16*)s1._rep->data;
1240                              const char* p2 = s2;
1241                              size_t n = s1._rep->size;
1242 david.dillard  1.105     
1243 mike           1.112         while (n--)
1244                              {
1245                                  if (!*p2)
1246                                      return false;
1247 david          1.71      
1248 mike           1.112             if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1249                                      return false;
1250                              }
1251 kumpf          1.42      
1252 mike           1.112         if (*p2)
1253                                  return false;
1254 david.dillard  1.116     
1255 mike           1.112         return true;
1256 karl           1.36      
1257 mike           1.112     #else /* PEGASUS_HAS_ICU */
1258 david.dillard  1.105     
1259 mike           1.112         // ATTN: optimize this!
1260                              return String::equalNoCase(s1, String(s2));
1261 david.dillard  1.105     
1262 mike           1.112     #endif /* PEGASUS_HAS_ICU */
1263                          }
1264 chuck          1.78      
1265 mike           1.112     Boolean String::equal(const String& s1, const String& s2)
1266 karl           1.36      {
1267 david.dillard  1.116         return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1268 mike           1.112             s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1269 karl           1.36      }
1270                          
1271 mike           1.112     Boolean String::equal(const String& s1, const char* s2)
1272                          {
1273                          #ifdef PEGASUS_STRING_NO_UTF8
1274 kumpf          1.35      
1275 mike           1.112         _checkNullPointer(s2);
1276 kumpf          1.39      
1277 mike           1.112         const Uint16* p = (Uint16*)s1._rep->data;
1278                              const char* q = s2;
1279 kumpf          1.39      
1280 mike           1.112         while (*p && *q)
1281                              {
1282                                  if (*p++ != Uint16(*q++))
1283                                      return false;
1284                              }
1285 kumpf          1.39      
1286 mike           1.112         return !(*p || *q);
1287 kumpf          1.39      
1288 mike           1.112     #else /* PEGASUS_STRING_NO_UTF8 */
1289 kumpf          1.39      
1290 mike           1.112         return String::equal(s1, String(s2));
1291 kumpf          1.39      
1292 mike           1.112     #endif /* PEGASUS_STRING_NO_UTF8 */
1293 kumpf          1.39      }
1294                          
1295 kumpf          1.47      PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1296 kumpf          1.39      {
1297 mike           1.112     #if defined(PEGASUS_OS_OS400)
1298 david          1.72      
1299 david          1.93          CString cstr = str.getCString();
1300 david          1.69          const char* utf8str = cstr;
1301 mike           1.112         os << utf8str;
1302                              return os;
1303 david.dillard  1.116     #else
1304 david          1.69      
1305 mike           1.112     #if defined(PEGASUS_HAS_ICU)
1306 david          1.69      
1307 yi.zhou        1.108         if (InitializeICU::initICUSuccessful())
1308                              {
1309 david.dillard  1.105             char *buf = NULL;
1310                                  const int size = str.size() * 6;
1311 mike           1.112             UnicodeString UniStr(
1312                                      (const UChar *)str.getChar16Data(), (int32_t)str.size());
1313 david.dillard  1.105             Uint32 bufsize = UniStr.extract(0,size,buf);
1314                                  buf = new char[bufsize+1];
1315                                  UniStr.extract(0,bufsize,buf);
1316                                  os << buf;
1317                                  os.flush();
1318                                  delete [] buf;
1319 david.dillard  1.116             return os;
1320 yi.zhou        1.108         }
1321 mike           1.112     
1322 david.dillard  1.116     #endif  // PEGASUS_HAS_ICU
1323 mike           1.112     
1324                              for (Uint32 i = 0, n = str.size(); i < n; i++)
1325 yi.zhou        1.108         {
1326 mike           1.112             Uint16 code = str[i];
1327 david.dillard  1.105     
1328 mike           1.112             if (code > 0 && !(code & 0xFF00))
1329                                          os << char(code);
1330                                  else
1331                                      {
1332                                      // Print in hex format:
1333                                      char buffer[8];
1334                                      sprintf(buffer, "\\x%04X", code);
1335                                      os << buffer;
1336 david.dillard  1.105             }
1337 yi.zhou        1.108         }
1338 kumpf          1.39      
1339                              return os;
1340 mike           1.112     #endif // PEGASUS_OS_OS400
1341 kumpf          1.39      }
1342                          
1343 mike           1.112     void StringAppendCharAux(StringRep*& _rep)
1344 kumpf          1.39      {
1345 mike           1.112         StringRep* tmp;
1346                          
1347                              if (_rep->cap)
1348                              {
1349                                  tmp = StringRep::alloc(2 * _rep->cap);
1350                                  tmp->size = _rep->size;
1351                                  _copy(tmp->data, _rep->data, _rep->size);
1352                              }
1353                              else
1354                              {
1355                                  tmp = StringRep::alloc(8);
1356                                  tmp->size = 0;
1357                              }
1358                          
1359                              StringRep::unref(_rep);
1360                              _rep = tmp;
1361 kumpf          1.39      }
1362                          
1363 mike           1.112     PEGASUS_NAMESPACE_END
1364                          
1365                          /*
1366                          ================================================================================
1367                          
1368                          String optimizations:
1369                          
1370                              1.  Added mechanism allowing certain functions to be inlined only when
1371                                  used by internal Pegasus modules. External modules (i.e., providers)
1372                                  link to a non-inline version, which allows for binary compatibility.
1373                          
1374                              2.  Implemented copy-on-write with atomic increment/decrement. This
1375                                  yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1376                                  for the 'ni1000' benchmark.
1377                          
1378                              3.  Employed loop unrolling in several places. For example, see:
1379                          
1380                                      static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1381                          
1382                              4.  Used the "empty-rep" optimization (described in whitepaper from the
1383                                  GCC Developers Summit). This reduced default construction to a simple
1384 mike           1.112             pointer assignment.
1385                          
1386                                      inline String::String() : _rep(&_emptyRep) { }
1387                          
1388                              5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1389                                  For example:
1390                          
1391                                      static const char _upper[] =
1392                                      {
1393                                          0,1,2,...255
1394                                      };
1395                          
1396                                      inline Uint16 _toUpper(Uint16 x)
1397                                      {
1398                                          return (x & 0xFF00) ? x : _upper[x];
1399                                      }
1400                          
1401 david.dillard  1.116             This outperforms the system implementation by avoiding an anding
1402 mike           1.112             operation.
1403                          
1404 david.dillard  1.116         6.  Implemented char* version of the following member functions to
1405                                  eliminate unecessary creation of anonymous string objects
1406 mike           1.112             (temporaries).
1407                          
1408                                      String(const String& s1, const char* s2);
1409                                      String(const char* s1, const String& s2);
1410                                      String& String::operator=(const char* str);
1411                                      Uint32 String::find(const char* s) const;
1412                                      bool String::equal(const String& s1, const char* s2);
1413                                      static int String::compare(const String& s1, const char* s2);
1414                                      String& String::append(const char* str);
1415                                      String& String::append(const char* str, Uint32 size);
1416                                      static bool String::equalNoCase(const String& s1, const char* s2);
1417                                      String& operator=(const char* str)
1418                                      String& String::assign(const char* str)
1419                                      String& String::append(const char* str)
1420                                      Boolean operator==(const String& s1, const char* s2)
1421                                      Boolean operator==(const char* s1, const String& s2)
1422                                      Boolean operator!=(const String& s1, const char* s2)
1423                                      Boolean operator!=(const char* s1, const String& s2)
1424                                      Boolean operator<(const String& s1, const char* s2)
1425                                      Boolean operator<(const char* s1, const String& s2)
1426                                      Boolean operator>(const String& s1, const char* s2)
1427 mike           1.112                 Boolean operator>(const char* s1, const String& s2)
1428                                      Boolean operator<=(const String& s1, const char* s2)
1429                                      Boolean operator<=(const char* s1, const String& s2)
1430                                      Boolean operator>=(const String& s1, const char* s2)
1431                                      Boolean operator>=(const char* s1, const String& s2)
1432                                      String operator+(const String& s1, const char* s2)
1433                                      String operator+(const char* s1, const String& s2)
1434                          
1435 david.dillard  1.116         7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1436 mike           1.112             power of two (algorithm from the book "Hacker's Delight").
1437                          
1438                                      static Uint32 _roundUpToPow2(Uint32 x)
1439                                      {
1440                                          if (x < 8)
1441                                              return 8;
1442                          
1443                                          x--;
1444                                          x |= (x >> 1);
1445                                          x |= (x >> 2);
1446                                          x |= (x >> 4);
1447                                          x |= (x >> 8);
1448                                          x |= (x >> 16);
1449                                          x++;
1450                          
1451                                          return x;
1452                                      }
1453                          
1454                              8.  Implemented "concatenating constructors" to eliminate temporaries
1455 david.dillard  1.116             created by operator+(). This scheme employs the "return-value
1456 mike           1.112             optimization" described by Stan Lippman.
1457                          
1458                                      inline String operator+(const String& s1, const String& s2)
1459                                      {
1460                                          return String(s1, s2, 0);
1461                                      }
1462                          
1463                              9.  Experimented to find the optimial initial size for a short string.
1464                                  Eight seems to offer the best tradeoff between space and time.
1465                          
1466                              10. Inlined all members of the Char16 class.
1467                          
1468                              11. Used Uint16 internally in the String class. This showed no improvememnt
1469                                  since Char16 was already fully inlined and was essentially reduced to
1470                                  Uint16 in any case.
1471                          
1472                              12. Implemented conditional logic (#if) allowing error checking logic to
1473 david.dillard  1.116             be excluded to better performance. Examples include bounds checking
1474 mike           1.112             and null-pointer checking.
1475                          
1476                              13. Used memcpy() and memcmp() where possible. These are implemented using
1477                                  the rep family of intructions under Intel and are much faster.
1478                          
1479 david.dillard  1.116         14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1480 mike           1.112             copy routine overhead.
1481                          
1482                              15. Added ASCII7 form of the constructor and assign().
1483                          
1484                                      String s("hello world", String::ASCII7);
1485                          
1486                                      s.assignASCII7("hello world");
1487                          
1488                                  This avoids slower UTF8 processing when not needed.
1489                          
1490                          ================================================================================
1491                          
1492                          TO-DO:
1493                          
1494                              (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1495                          
1496                              (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1497                          
1498                              (+) [DONE] Eliminate char versions of find() and append().
1499                          
1500                              (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1501 mike           1.112     
1502                              (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1503                          
1504                              (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1505                          
1506                              (+) [DONE] Comment StringRep allocation layout.
1507                          
1508                              (+) [DONE] Conceal private inline functions.
1509                          
1510                              (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1511                          
1512                              (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1513                                  rid of altogether.
1514                          
1515                              (+) [DONE] useCamelNotationOnAllFunctionNames.
1516                          
1517                              (+) [DONE] Check for overlow condition in StringRep::alloc().
1518                          
1519                              (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1520                          
1521                              (+) [DONE] Fix throw-related memory leak.
1522 mike           1.112     
1523                              (+) [DONE] Look at PEP223 for coding security guidelines.
1524                          
1525                              (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1526 kumpf          1.39      
1527 mike           1.112         (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1528 kumpf          1.39      
1529 mike           1.112         (+) DOC++ String.h - will open new bug?
1530 kumpf          1.39      
1531 mike           1.112         (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1532                          	on certain platforms).
1533 kumpf          1.39      
1534 mike           1.112     ================================================================================
1535                          */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2