(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30                     // Author: Mike Brasher (mbrasher@bmc.com)
  31                     //
  32 mike          1.111.6.1 // Modified By: 
  33                         //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  34                         //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  35                         //     David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
  36                         //     Mike Brasher (mike-brasher@austin.rr.com)
  37 mike          1.27      //
  38                         //%/////////////////////////////////////////////////////////////////////////////
  39                         
  40 mike          1.111.6.1 #include <cassert>
  41 kumpf         1.48      #include "InternalException.h"
  42 david         1.69      #include "CommonUTF.h"
  43 mike          1.111.6.1 #include "MessageLoader.h"
  44 mike          1.111.6.2 #include "StringRep.h"
  45 david         1.69      
  46                         #ifdef PEGASUS_HAS_ICU
  47 chuck         1.99      #include <unicode/ustring.h>
  48                         #include <unicode/uchar.h>
  49 david         1.69      #endif
  50                         
  51 mike          1.27      PEGASUS_NAMESPACE_BEGIN
  52                         
  53 mike          1.111.6.1 //==============================================================================
  54                         //
  55                         // Compile-time macros (undefined by default).
  56                         //
  57                         //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  58                         //      
  59                         //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  60                         //
  61                         //==============================================================================
  62                         
  63                         //==============================================================================
  64 kumpf         1.39      //
  65 mike          1.111.6.1 // File-scope definitions:
  66 kumpf         1.54      //
  67 mike          1.111.6.1 //==============================================================================
  68                         
  69 mike          1.111.6.6 const Uint8 _toUpperTable[256] = 
  70 mike          1.111.6.1 {
  71                             0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  72                             0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  73                             0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  74                             0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  75                             0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  76                             0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  77                             0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  78                             0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  79                             0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  80                             0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  81                             0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  82                             0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  83                             0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  84                             0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  85                             0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  86                             0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  87                             0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  88                             0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  89                             0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  90                             0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  91 mike          1.111.6.1     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  92                             0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  93                             0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  94                             0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  95                             0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  96                             0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  97                             0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  98                             0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  99                             0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 100                             0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 101                             0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 102                             0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 103                         };
 104                         
 105 mike          1.111.6.6 const Uint8 _toLowerTable[256] = 
 106 mike          1.111.6.1 {
 107                             0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 108                             0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 109                             0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 110                             0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 111                             0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 112                             0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 113                             0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 114                             0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 115                             0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 116                             0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 117                             0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 118                             0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 119                             0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 120                             0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 121                             0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 122                             0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 123                             0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 124                             0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 125                             0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 126                             0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 127 mike          1.111.6.1     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 128                             0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 129                             0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 130                             0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 131                             0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 132                             0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 133                             0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 134                             0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 135                             0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 136                             0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 137                             0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 138                             0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 139                         };
 140 kumpf         1.54      
 141 mike          1.111.6.1 // Converts 16-bit characters to upper case.
 142 mike          1.111.6.6 inline Uint16 _toUpper(Uint16 x)
 143 kumpf         1.54      {
 144 mike          1.111.6.6     return (x & 0xFF00) ? x : _toUpperTable[x];
 145 kumpf         1.54      }
 146                         
 147 mike          1.111.6.1 // Converts 16-bit characters to lower case.
 148 mike          1.111.6.6 inline Uint16 _toLower(Uint16 x)
 149 kumpf         1.54      {
 150 mike          1.111.6.6     return (x & 0xFF00) ? x : _toLowerTable[x];
 151 mike          1.111.6.1 }
 152 kumpf         1.82      
 153 mike          1.111.6.6 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 154 mike          1.111.6.5 static Uint32 _roundUpToPow2(Uint32 x)
 155 mike          1.111.6.1 {
 156 mike          1.111.6.7 #ifndef PEGASUS_STRING_NO_THROW
 157                         
 158 mike          1.111.6.6     if (x > 0x0FFFFFFF)
 159 mike          1.111.6.8         throw PEGASUS_STD(bad_alloc)();
 160 mike          1.111.6.6 
 161 mike          1.111.6.7 #endif
 162                         
 163 mike          1.111.6.1     if (x < 8)
 164 mike          1.111.6.8         return 8;
 165 mike          1.111.6.1 
 166                             x--;
 167                             x |= (x >> 1);
 168                             x |= (x >> 2);
 169                             x |= (x >> 4);
 170                             x |= (x >> 8);
 171                             x |= (x >> 16);
 172                             x++;
 173                         
 174                             return x;
 175                         }
 176                         
 177                         template<class P, class Q>
 178                         static void _copy(P* p, const Q* q, size_t n)
 179                         {
 180                             // Use loop unrolling.
 181                         
 182                             while (n >= 8)
 183 kumpf         1.82          {
 184 mike          1.111.6.8         p[0] = q[0];
 185                                 p[1] = q[1];
 186                                 p[2] = q[2];
 187                                 p[3] = q[3];
 188                                 p[4] = q[4];
 189                                 p[5] = q[5];
 190                                 p[6] = q[6];
 191                                 p[7] = q[7];
 192                                 p += 8;
 193                                 q += 8;
 194                                 n -= 8;
 195 kumpf         1.82          }
 196 mike          1.111.6.1 
 197                             while (n >= 4)
 198                             {
 199 mike          1.111.6.8         p[0] = q[0];
 200                                 p[1] = q[1];
 201                                 p[2] = q[2];
 202                                 p[3] = q[3];
 203                                 p += 4;
 204                                 q += 4;
 205                                 n -= 4;
 206 mike          1.111.6.1     }
 207                         
 208                             while (n--)
 209 mike          1.111.6.8         *p++ = *q++;
 210 kumpf         1.54      }
 211                         
 212 mike          1.111.6.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 213 kumpf         1.54      {
 214 mike          1.111.6.1     while (n >= 4)
 215                             {
 216 mike          1.111.6.8         if (s[0] == c)
 217                                     return (Uint16*)s;
 218                                 if (s[1] == c)
 219                                     return (Uint16*)&s[1];
 220                                 if (s[2] == c)
 221                                     return (Uint16*)&s[2];
 222                                 if (s[3] == c)
 223                                     return (Uint16*)&s[3];
 224 mike          1.111.6.1 
 225 mike          1.111.6.8         n -= 4;
 226                                 s += 4;
 227 mike          1.111.6.1     }
 228                         
 229                             if (n)
 230                             {
 231 mike          1.111.6.8         if (*s == c)
 232                                     return (Uint16*)s;
 233                                 s++;
 234                                 n--;
 235 mike          1.111.6.1     }
 236                         
 237                             if (n)
 238                             {
 239 mike          1.111.6.8         if (*s == c)
 240                                     return (Uint16*)s;
 241                                 s++;
 242                                 n--;
 243 mike          1.111.6.1     }
 244                         
 245                             if (n && *s == c)
 246 mike          1.111.6.8         return (Uint16*)s;
 247 mike          1.111.6.1 
 248                             // Not found!
 249                             return 0;
 250 kumpf         1.54      }
 251                         
 252 mike          1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2)
 253 kumpf         1.54      {
 254 mike          1.111.6.1     while (*s1 && *s2)
 255 kumpf         1.82          {
 256 mike          1.111.6.1         int r = *s1++ - *s2++;
 257                         
 258                                 if (r)
 259                                     return r;
 260 kumpf         1.82          }
 261 mike          1.111.6.1 
 262                             if (*s2)
 263                                 return -1;
 264                             else if (*s1)
 265                                 return 1;
 266                         
 267                             return 0;
 268 kumpf         1.54      }
 269                         
 270 mike          1.111.6.6 static int _compareNoUTF8(const Uint16* s1, const char* s2)
 271 kumpf         1.56      {
 272 mike          1.111.6.1     Uint16 c1;
 273                             Uint16 c2;
 274                         
 275                             do
 276 kumpf         1.81          {
 277 mike          1.111.6.8         c1 = *s1++;
 278                                 c2 = *s2++;
 279 mike          1.111.6.1 
 280 mike          1.111.6.8         if (c1 == 0)
 281                                     return c1 - c2;
 282 kumpf         1.81          }
 283 mike          1.111.6.1     while (c1 == c2);
 284                         
 285                             return c1 - c2;
 286 kumpf         1.56      }
 287                         
 288 mike          1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
 289 kumpf         1.54      {
 290 mike          1.111.6.1     // This should only be called when s1 and s2 have the same length.
 291                         
 292                             while (n-- && (*s1++ - *s2++) == 0)
 293 mike          1.111.6.8         ;
 294 mike          1.111.6.1 
 295                             // 
 296                         
 297                             return s1[-1] - s2[-1];
 298 kumpf         1.54      }
 299                         
 300 mike          1.111.6.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 301                         {
 302                             memcpy(s1, s2, n * sizeof(Uint16));
 303                         }
 304 kumpf         1.39      
 305 mike          1.111.6.6 void StrinThrowOutOfBounds()
 306 mike          1.111.6.1 {
 307                             throw IndexOutOfBoundsException();
 308                         }
 309 mike          1.27      
 310 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_THROW
 311 mike          1.111.6.6 # define _checkNullPointer(ARG) /* empty */
 312 mike          1.111.6.1 #else
 313                         template<class T>
 314 mike          1.111.6.6 inline void _checkNullPointer(const T* ptr)
 315 mike          1.27      {
 316 mike          1.111.6.1     if (!ptr)
 317 mike          1.111.6.8         throw NullPointer();
 318 mike          1.111.6.1 }
 319                         #endif
 320                         
 321 mike          1.111.6.6 static size_t _copyFromUTF8(Uint16* dest, const char* src, size_t n)
 322 mike          1.111.6.1 {
 323                             Uint16* p = dest;
 324                             const Uint8* q = (const Uint8*)src;
 325 mike          1.27      
 326 mike          1.111.6.1     // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 327                             // Use loop-unrolling.
 328 mike          1.27      
 329 mike          1.111.6.1     while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 330                             {
 331 mike          1.111.6.8         p[0] = q[0];
 332                                 p[1] = q[1];
 333                                 p[2] = q[2];
 334                                 p[3] = q[3];
 335                                 p[4] = q[4];
 336                                 p[5] = q[5];
 337                                 p[6] = q[6];
 338                                 p[7] = q[7];
 339                                 p += 8;
 340                                 q += 8;
 341                                 n -= 8;
 342 mike          1.111.6.1     }
 343                         
 344                             while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 345                             {
 346 mike          1.111.6.8         p[0] = q[0];
 347                                 p[1] = q[1];
 348                                 p[2] = q[2];
 349                                 p[3] = q[3];
 350                                 p += 4;
 351                                 q += 4;
 352                                 n -= 4;
 353 mike          1.111.6.1     }
 354                         
 355                             switch (n)
 356                             {
 357 mike          1.111.6.8         case 0:
 358                                     return p - dest;
 359                                 case 1:
 360                                     if (q[0] < 128)
 361                                     {
 362                                         p[0] = q[0];
 363                                         return p + 1 - dest;
 364                                     }
 365                                     break;
 366                                 case 2:
 367                                     if (((q[0]|q[1]) & 0x80) == 0)
 368                                     {
 369                                         p[0] = q[0];
 370                                         p[1] = q[1];
 371                                         return p + 2 - dest;
 372                                     }
 373                                     break;
 374                                 case 3:
 375                                     if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 376                                     {
 377                                         p[0] = q[0];
 378 mike          1.111.6.8                 p[1] = q[1];
 379                                         p[2] = q[2];
 380                                         return p + 3 - dest;
 381                                     }
 382                                     break;
 383 mike          1.111.6.1     }
 384                         
 385                             // Process remaining characters.
 386                         
 387                             while (n)
 388                             {
 389 mike          1.111.6.8         // Optimize for 7-bit ASCII case.
 390 mike          1.111.6.1 
 391 mike          1.111.6.8         if (*q < 128)
 392                                 {
 393                                     *p++ = *q++;
 394                                     n--;
 395                                 }
 396                                 else
 397                                 {
 398                                     Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 399                         
 400                                     if (c > n || !isValid_U8(q, c) ||
 401                                         UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 402                                     {
 403                                         MessageLoaderParms parms("Common.String.BAD_UTF8",
 404                                             "The byte sequence starting at index $0 "
 405                                             "is not valid UTF-8 encoding.",
 406                                              q - (const Uint8*)src);
 407                                         throw Exception(parms);
 408                                     }
 409 mike          1.111.6.1 
 410 mike          1.111.6.8             n -= c;
 411                                 }
 412 mike          1.111.6.1     }
 413                         
 414                             return p - dest;
 415                         }
 416                         
 417                         // Note: dest must be at least three times src (plus an extra byte for 
 418                         // terminator).
 419 mike          1.111.6.6 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 420 mike          1.111.6.1 {
 421                             const Uint16* q = src;
 422                             Uint8* p = (Uint8*)dest;
 423                         
 424                             while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 425                             {
 426 mike          1.111.6.8         p[0] = q[0];
 427                                 p[1] = q[1];
 428                                 p[2] = q[2];
 429                                 p[3] = q[3];
 430                                 p += 4;
 431                                 q += 4;
 432                                 n -= 4;
 433 mike          1.111.6.1     }
 434                         
 435                             switch (n)
 436                             {
 437 mike          1.111.6.8         case 0:
 438                                     return p - (Uint8*)dest;
 439                                 case 1:
 440                                     if (q[0] < 128)
 441                                     {
 442                                         p[0] = q[0];
 443                                         return p + 1 - (Uint8*)dest;
 444                                     }
 445                                     break;
 446                                 case 2:
 447                                     if (q[0] < 128 && q[1] < 128)
 448                                     {
 449                                         p[0] = q[0];
 450                                         p[1] = q[1];
 451                                         return p + 2 - (Uint8*)dest;
 452                                     }
 453                                     break;
 454                                 case 3:
 455                                     if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 456                                     {
 457                                         p[0] = q[0];
 458 mike          1.111.6.8                 p[1] = q[1];
 459                                         p[2] = q[2];
 460                                         return p + 3 - (Uint8*)dest;
 461                                     }
 462                                     break;
 463 mike          1.111.6.1     }
 464                         
 465                             // If this line was reached, there must be characters greater than 128.
 466                         
 467                             UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 468 mike          1.27      
 469 mike          1.111.6.1     return p - (Uint8*)dest;
 470                         }
 471                         
 472                         static inline size_t _convert(Uint16* p, const char* q, size_t n)
 473                         {
 474                         #ifdef PEGASUS_STRING_NO_UTF8
 475                             _copy(p, q, n);
 476 mike          1.27          return n;
 477 mike          1.111.6.1 #else
 478 mike          1.111.6.6     return _copyFromUTF8(p, q, n);
 479 mike          1.111.6.1 #endif
 480 mike          1.27      }
 481                         
 482 mike          1.111.6.1 //==============================================================================
 483 chuck         1.102     //
 484 mike          1.111.6.1 // class CString
 485 chuck         1.102     //
 486 mike          1.111.6.1 //==============================================================================
 487                         
 488                         CString::CString(const CString& cstr) : _rep(0)
 489 chuck         1.102     {
 490 mike          1.111.6.1     if (cstr._rep)
 491 chuck         1.102         {
 492 mike          1.111.6.8         size_t n = strlen(cstr._rep) + 1;
 493 mike          1.111.6.1         _rep = (char*)operator new(n);
 494 mike          1.111.6.8         memcpy(_rep, cstr._rep, n);
 495 mike          1.111.6.1     }
 496                         }
 497                         
 498                         CString& CString::operator=(const CString& cstr)
 499                         {
 500                             if (&cstr != this)
 501                             {
 502                                 if (_rep)
 503 david.dillard 1.105             {
 504 mike          1.111.6.1             operator delete(_rep);
 505                                     _rep = 0;
 506 chuck         1.102             }
 507 mike          1.111.6.1 
 508                                 if (cstr._rep)
 509 chuck         1.102             {
 510 mike          1.111.6.8             size_t n = strlen(cstr._rep) + 1;
 511 mike          1.111.6.1             _rep = (char*)operator new(n);
 512 mike          1.111.6.8             memcpy(_rep, cstr._rep, n);
 513 chuck         1.102             }
 514 mike          1.111.6.1     }
 515 chuck         1.102     
 516 mike          1.111.6.1     return *this;
 517                         }
 518                         
 519                         //==============================================================================
 520                         //
 521                         // class StringRep
 522                         //
 523                         //==============================================================================
 524                         
 525 mike          1.111.6.6 StringRep StringRep::_emptyRep;
 526 chuck         1.102     
 527 mike          1.111.6.1 inline StringRep* StringRep::alloc(size_t cap)
 528                         {
 529 mike          1.111.6.7 #ifndef PEGASUS_STRING_NO_THROW
 530                         
 531 mike          1.111.6.6     // Any string bigger than this is seriously suspect.
 532                             if (cap > 0x0FFFFFFF)
 533 mike          1.111.6.8         throw PEGASUS_STD(bad_alloc)();
 534 mike          1.111.6.6 
 535 mike          1.111.6.7 #endif
 536                         
 537 mike          1.111.6.1     StringRep* rep = (StringRep*)::operator new(
 538 mike          1.111.6.8         sizeof(StringRep) + cap * sizeof(Uint16));
 539 mike          1.111.6.1     rep->cap = cap;
 540                             Atomic_create(&rep->refs, 1);
 541                         
 542                             return rep;
 543 chuck         1.102     }
 544                         
 545 mike          1.111.6.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
 546 kumpf         1.43      {
 547 mike          1.111.6.1     if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
 548                             {
 549 mike          1.111.6.8         size_t n = _roundUpToPow2(cap);
 550                                 StringRep* newRep = StringRep::alloc(n);
 551                                 newRep->size = rep->size;
 552                                 _copy(newRep->data, rep->data, rep->size + 1);
 553                                 StringRep::unref(rep);
 554                                 rep = newRep;
 555 mike          1.111.6.1     }
 556                         }
 557 kumpf         1.43      
 558 mike          1.111.6.1 StringRep* StringRep::create(const Uint16* data, size_t size)
 559                         {
 560                             StringRep* rep = StringRep::alloc(size);
 561                             rep->size = size;
 562                             _copy(rep->data, data, size);
 563                             rep->data[size] = '\0';
 564                             return rep;
 565                         }
 566                         
 567 mike          1.111.6.6 StringRep* StringRep::copyOnWrite(StringRep* rep)
 568 mike          1.111.6.1 {
 569                             // Return a new copy of rep. Release rep.
 570                         
 571 mike          1.111.6.6     StringRep* newRep = StringRep::alloc(rep->size);
 572                             newRep->size = rep->size;
 573                             _copy(newRep->data, rep->data, rep->size);
 574                             newRep->data[newRep->size] = '\0';
 575 mike          1.111.6.1     StringRep::unref(rep);
 576 mike          1.111.6.6     return newRep;
 577 mike          1.111.6.1 }
 578 kumpf         1.43      
 579 mike          1.111.6.1 StringRep* StringRep::create(const char* data, size_t size)
 580 mike          1.27      {
 581 mike          1.111.6.1     StringRep* rep = StringRep::alloc(size);
 582                             rep->size = _convert((Uint16*)rep->data, data, size);
 583                             rep->data[rep->size] = '\0';
 584                         
 585                             return rep;
 586 mike          1.27      }
 587                         
 588 mike          1.111.6.1 StringRep* StringRep::createASCII7(const char* data, size_t size)
 589 mike          1.27      {
 590 mike          1.111.6.1     StringRep* rep = StringRep::alloc(size);
 591                             _copy((Uint16*)rep->data, data, size);
 592                             rep->data[rep->size = size] = '\0';
 593                             return rep;
 594 kumpf         1.39      }
 595 tony          1.66      
 596 mike          1.111.6.1 Uint32 StringRep::length(const Uint16* str)
 597                         {
 598                             // Note: We could unroll this but it is rarely called.
 599                         
 600                             const Uint16* end = (Uint16*)str;
 601                         
 602                             while (*end++)
 603 mike          1.111.6.8         ;
 604 mike          1.111.6.1 
 605                             return end - str - 1;
 606                         }
 607                         
 608                         //==============================================================================
 609                         //
 610                         // class String
 611                         //
 612                         //==============================================================================
 613                         
 614                         const String String::EMPTY;
 615 mike          1.27      
 616 kumpf         1.39      String::String(const String& str, Uint32 n)
 617                         {
 618 mike          1.111.6.6     _checkBounds(n, str._rep->size);
 619 mike          1.111.6.1     _rep = StringRep::create(str._rep->data, n);
 620 kumpf         1.39      }
 621                         
 622                         String::String(const Char16* str)
 623                         {
 624 mike          1.111.6.6     _checkNullPointer(str);
 625 mike          1.111.6.1     _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 626 mike          1.27      }
 627                         
 628 kumpf         1.39      String::String(const Char16* str, Uint32 n)
 629                         {
 630 mike          1.111.6.6     _checkNullPointer(str);
 631 mike          1.111.6.1     _rep = StringRep::create((Uint16*)str, n);
 632 kumpf         1.39      }
 633                         
 634                         String::String(const char* str)
 635 mike          1.27      {
 636 mike          1.111.6.6     _checkNullPointer(str);
 637 mike          1.111.6.1     _rep = StringRep::create(str, strlen(str));
 638                         }
 639 david.dillard 1.105     
 640 mike          1.111.6.1 String::String(const char* str, String::ASCII7Tag tag)
 641                         {
 642 mike          1.111.6.6     _checkNullPointer(str);
 643 mike          1.111.6.1     _rep = StringRep::createASCII7(str, strlen(str));
 644 mike          1.27      }
 645                         
 646 kumpf         1.39      String::String(const char* str, Uint32 n)
 647 mike          1.27      {
 648 mike          1.111.6.6     _checkNullPointer(str);
 649 mike          1.111.6.1     _rep = StringRep::create(str, n);
 650                         }
 651 david.dillard 1.105     
 652 mike          1.111.6.1 String::String(const char* str, size_t n, String::ASCII7Tag tag)
 653                         {
 654 mike          1.111.6.6     _checkNullPointer(str);
 655 mike          1.111.6.1     _rep = StringRep::createASCII7(str, n);
 656 kumpf         1.39      }
 657 mike          1.27      
 658 mike          1.111.6.1 String::String(const String& s1, const String& s2)
 659 kumpf         1.39      {
 660 mike          1.111.6.1     size_t n1 = s1._rep->size;
 661                             size_t n2 = s2._rep->size;
 662                             size_t n = n1 + n2;
 663                             _rep = StringRep::alloc(n);
 664                             _copy(_rep->data, s1._rep->data, n1);
 665                             _copy(_rep->data + n1, s2._rep->data, n2);
 666                             _rep->size = n;
 667                             _rep->data[n] = '\0';
 668 mike          1.27      }
 669                         
 670 mike          1.111.6.1 String::String(const String& s1, const char* s2)
 671 mike          1.27      {
 672 mike          1.111.6.6     _checkNullPointer(s2);
 673 mike          1.111.6.1     size_t n1 = s1._rep->size;
 674                             size_t n2 = strlen(s2);
 675                             _rep = StringRep::alloc(n1 + n2);
 676                             _copy(_rep->data, s1._rep->data, n1);
 677                             _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
 678                             _rep->data[_rep->size] = '\0';
 679 mike          1.27      }
 680                         
 681 mike          1.111.6.1 String::String(const char* s1, const String& s2)
 682 mike          1.27      {
 683 mike          1.111.6.6     _checkNullPointer(s1);
 684 mike          1.111.6.1     size_t n1 = strlen(s1);
 685                             size_t n2 = s2._rep->size;
 686                             _rep = StringRep::alloc(n1 + n2);
 687                             _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
 688                             _copy(_rep->data + n1, s2._rep->data, n2);
 689                             _rep->data[_rep->size] = '\0';
 690 mike          1.27      }
 691                         
 692 mike          1.111.6.1 String& String::assign(const String& str)
 693 mike          1.27      {
 694 mike          1.111.6.1     if (_rep != str._rep)
 695 david.dillard 1.105         {
 696 mike          1.111.6.8         StringRep::unref(_rep);
 697                                 StringRep::ref(_rep = str._rep);
 698 david.dillard 1.105         }
 699                         
 700 mike          1.27          return *this;
 701                         }
 702                         
 703                         String& String::assign(const Char16* str, Uint32 n)
 704                         {
 705 mike          1.111.6.6     _checkNullPointer(str);
 706 mike          1.111.6.1 
 707                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 708 david.dillard 1.105         {
 709 mike          1.111.6.8         StringRep::unref(_rep);
 710                                 _rep = StringRep::alloc(n);
 711 david.dillard 1.105         }
 712                         
 713 mike          1.111.6.1     _rep->size = n;
 714                             _copy(_rep->data, (Uint16*)str, n);
 715                             _rep->data[n] = '\0';
 716                         
 717 mike          1.27          return *this;
 718                         }
 719                         
 720 mike          1.111.6.1 String& String::assign(const char* str, Uint32 n)
 721 chuck         1.102     {
 722 mike          1.111.6.6     _checkNullPointer(str);
 723 mike          1.111.6.1 
 724                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 725 david.dillard 1.105         {
 726 mike          1.111.6.8         StringRep::unref(_rep);
 727                                 _rep = StringRep::alloc(n);
 728 david.dillard 1.105         }
 729                         
 730 mike          1.111.6.1     _rep->size = _convert(_rep->data, str, n);
 731                             _rep->data[_rep->size] = 0;
 732                         
 733 chuck         1.102         return *this;
 734                         }
 735                         
 736 mike          1.111.6.1 String& String::assignASCII7(const char* str, Uint32 n)
 737 mike          1.27      {
 738 mike          1.111.6.6     _checkNullPointer(str);
 739 mike          1.111.6.1 
 740                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 741 david.dillard 1.105         {
 742 mike          1.111.6.8         StringRep::unref(_rep);
 743                                 _rep = StringRep::alloc(n);
 744 david.dillard 1.105         }
 745                         
 746 mike          1.111.6.1     _copy(_rep->data, str, n);
 747                             _rep->data[_rep->size = n] = 0;
 748                         
 749 mike          1.27          return *this;
 750                         }
 751                         
 752 kumpf         1.39      void String::clear()
 753                         {
 754 mike          1.111.6.1     if (_rep->size)
 755                             {
 756 mike          1.111.6.8         if (Atomic_get(&_rep->refs) == 1)
 757                                 {
 758                                     _rep->size = 0;
 759                                     _rep->data[0] = '\0';
 760                                 }
 761                                 else
 762                                 {
 763                                     StringRep::unref(_rep);
 764                                     _rep = &StringRep::_emptyRep;
 765                                 }
 766 mike          1.111.6.1     }
 767 kumpf         1.39      }
 768                         
 769 mike          1.111.6.1 void String::reserveCapacity(Uint32 cap)
 770 kumpf         1.39      {
 771 mike          1.111.6.1     _reserve(_rep, cap);
 772 kumpf         1.39      }
 773                         
 774 mike          1.111.6.1 CString String::getCString() const
 775                         {
 776                         #ifdef PEGASUS_STRING_NO_UTF8
 777                             char* str = (char*)operator new(_rep->size + 1);
 778                             _copy(str, _rep->data, _rep->size);
 779                             str[_rep->size] = '\0';
 780                             return CString(str);
 781 gs.keenan     1.110     #else
 782 mike          1.111.6.1     Uint32 n = 3 * _rep->size;
 783                             char* str = (char*)operator new(n + 1);
 784 mike          1.111.6.6     size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 785 mike          1.111.6.1     str[size] = '\0';
 786                             return CString(str);
 787 gs.keenan     1.110     #endif
 788 kumpf         1.39      }
 789                         
 790 mike          1.111.6.1 String& String::append(const Char16* str, Uint32 n)
 791 kumpf         1.39      {
 792 mike          1.111.6.6     _checkNullPointer(str);
 793 kumpf         1.39      
 794 mike          1.111.6.6     size_t oldSize = _rep->size;
 795                             size_t newSize = oldSize + n;
 796                             _reserve(_rep, newSize);
 797                             _copy(_rep->data + oldSize, (Uint16*)str, n);
 798                             _rep->size = newSize;
 799                             _rep->data[newSize] = '\0';
 800 mike          1.27      
 801 mike          1.111.6.1     return *this;
 802 mike          1.27      }
 803                         
 804 mike          1.111.6.1 String& String::append(const String& str)
 805 mike          1.27      {
 806 mike          1.111.6.1     return append((Char16*)str._rep->data, str._rep->size);
 807 mike          1.27      }
 808                         
 809 mike          1.111.6.1 String& String::append(const char* str, Uint32 size)
 810 kumpf         1.39      {
 811 mike          1.111.6.6     _checkNullPointer(str);
 812 mike          1.111.6.1 
 813 mike          1.111.6.6     size_t oldSize = _rep->size;
 814                             size_t cap = oldSize + size;
 815 mike          1.111.6.1 
 816                             _reserve(_rep, cap);
 817 mike          1.111.6.6     _rep->size += _convert((Uint16*)_rep->data + oldSize, str, size);
 818 mike          1.111.6.1     _rep->data[_rep->size] = '\0';
 819 kumpf         1.39      
 820 mike          1.27          return *this;
 821                         }
 822                         
 823 mike          1.111.6.1 void String::remove(Uint32 index, Uint32 n)
 824 mike          1.27      {
 825 mike          1.111.6.1     if (n == PEG_NOT_FOUND)
 826                                 n = _rep->size - index;
 827 mike          1.27      
 828 mike          1.111.6.6     _checkBounds(index + n, _rep->size);
 829 mike          1.27      
 830 mike          1.111.6.1     if (Atomic_get(&_rep->refs) != 1)
 831 mike          1.111.6.8         _rep = StringRep::copyOnWrite(_rep);
 832 mike          1.27      
 833 mike          1.111.6.1     assert(index + n <= _rep->size);
 834                         
 835                             size_t rem = _rep->size - (index + n);
 836                             Uint16* data = _rep->data;
 837                         
 838                             if (rem)
 839                                 memmove(data + index, data + index + n, rem * sizeof(Uint16));
 840                         
 841                             _rep->size -= n;
 842                             data[_rep->size] = '\0';
 843 mike          1.27      }
 844                         
 845 mike          1.111.6.1 String String::subString(Uint32 index, Uint32 n) const
 846 mike          1.27      {
 847 mike          1.111.6.1     // Note: this implementation is very permissive but used for
 848                             // backwards compatibility.
 849                         
 850                             if (index < _rep->size)
 851 mike          1.27          {
 852 mike          1.111.6.8         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 853                                     n = _rep->size - index;
 854 mike          1.27      
 855 mike          1.111.6.8         return String((Char16*)_rep->data + index, n);
 856 mike          1.27          }
 857 david.dillard 1.105     
 858                             return String();
 859 mike          1.27      }
 860                         
 861                         Uint32 String::find(Char16 c) const
 862                         {
 863 mike          1.111.6.1     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 864 mike          1.27      
 865 mike          1.111.6.1     if (p)
 866 mike          1.111.6.8         return p - _rep->data;
 867 mike          1.27      
 868                             return PEG_NOT_FOUND;
 869                         }
 870                         
 871 kumpf         1.53      Uint32 String::find(Uint32 index, Char16 c) const
 872 mike          1.30      {
 873 mike          1.111.6.6     _checkBounds(index, _rep->size);
 874 mike          1.30      
 875 mike          1.111.6.1     if (index >= _rep->size)
 876 mike          1.111.6.8         return PEG_NOT_FOUND;
 877 mike          1.111.6.1 
 878                             Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 879                         
 880                             if (p)
 881 mike          1.111.6.8         return p - _rep->data;
 882 mike          1.30      
 883                             return PEG_NOT_FOUND;
 884                         }
 885                         
 886 mike          1.111.6.6 Uint32 StringFindAux(
 887 mike          1.111.6.5     const StringRep* _rep, const Char16* s, Uint32 n)
 888 mike          1.27      {
 889 mike          1.111.6.6     _checkNullPointer(s);
 890 mike          1.27      
 891 mike          1.111.6.1     const Uint16* data = _rep->data;
 892                             size_t rem = _rep->size;
 893 mike          1.30      
 894 mike          1.111.6.1     while (n <= rem)
 895 mike          1.27          {
 896 mike          1.111.6.8         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 897 david.dillard 1.105     
 898 mike          1.111.6.8         if (!p)
 899                                     break;
 900 mike          1.111.6.1 
 901 mike          1.111.6.8         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 902                                     return p - _rep->data;
 903 mike          1.111.6.1 
 904 mike          1.111.6.8         p++;
 905                                 rem -= p - data;
 906                                 data = p;
 907 mike          1.27          }
 908 mike          1.111.6.1 
 909 mike          1.27          return PEG_NOT_FOUND;
 910                         }
 911                         
 912 mike          1.111.6.1 Uint32 String::find(const char* s) const
 913                         {
 914 mike          1.111.6.6     _checkNullPointer(s);
 915 mike          1.111.6.1 
 916                             // Note: could optimize away creation of temporary, but this is rarely
 917                             // called.
 918                             return find(String(s));
 919                         }
 920                         
 921 mike          1.27      Uint32 String::reverseFind(Char16 c) const
 922                         {
 923 mike          1.111.6.1     Uint16 x = c;
 924                             Uint16* p = _rep->data;
 925                             Uint16* q = _rep->data + _rep->size;
 926 mike          1.27      
 927 mike          1.111.6.1     while (q != p)
 928 mike          1.27          {
 929 mike          1.111.6.8         if (*--q == x)
 930                                     return q - p;
 931 mike          1.27          }
 932                         
 933                             return PEG_NOT_FOUND;
 934                         }
 935                         
 936                         void String::toLower()
 937                         {
 938 david         1.69      #ifdef PEGASUS_HAS_ICU
 939 mike          1.111.6.1 
 940 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 941 david         1.90          {
 942 mike          1.111.6.1         if (Atomic_get(&_rep->refs) != 1)
 943 mike          1.111.6.8             _rep = StringRep::copyOnWrite(_rep);
 944 mike          1.111.6.1 
 945 yi.zhou       1.108             // This will do a locale-insensitive, but context-sensitive convert.
 946 mike          1.111.6.1         // Since context-sensitive casing looks at adjacent chars, this 
 947                                 // prevents optimizations where the us-ascii is converted before 
 948                                 // calling ICU.
 949 yi.zhou       1.108             // The string may shrink or expand after the convert.
 950                         
 951 mike          1.111.6.8         //// First calculate size of resulting string. u_strToLower() returns
 952                                 //// only the size when zero is passed as the destination size argument.
 953 mike          1.111.6.1 
 954 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
 955                         
 956 mike          1.111.6.6         int32_t newSize = u_strToLower(
 957 mike          1.111.6.8             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 958 mike          1.111.6.1         
 959                                 err = U_ZERO_ERROR;
 960 chuck         1.99      
 961 mike          1.111.6.8         //// Reserve enough space for the result.
 962 mike          1.111.6.1 
 963 mike          1.111.6.8         if ((Uint32)newSize > _rep->cap)
 964                                     _reserve(_rep, newSize);
 965 mike          1.111.6.1 
 966 mike          1.111.6.8         //// Perform the conversion (overlapping buffers are allowed).
 967 yi.zhou       1.108     
 968 mike          1.111.6.6         u_strToLower((UChar*)_rep->data, newSize,
 969 mike          1.111.6.8             (UChar*)_rep->data, _rep->size, NULL, &err);
 970 mike          1.111.6.1 
 971 mike          1.111.6.8         _rep->size = newSize;
 972                                 return;
 973 david         1.90          }
 974 mike          1.111.6.1 
 975                         #endif /* PEGASUS_HAS_ICU */
 976                         
 977                             if (Atomic_get(&_rep->refs) != 1)
 978 mike          1.111.6.8         _rep = StringRep::copyOnWrite(_rep);
 979 mike          1.111.6.1 
 980                             Uint16* p = _rep->data;
 981                             size_t n = _rep->size;
 982                         
 983                             for (; n--; p++)
 984 david         1.90          {
 985 mike          1.111.6.8         if (!(*p & 0xFF00))
 986                                     *p = _toLower(*p);
 987 mike          1.27          }
 988 kumpf         1.39      }
 989                         
 990 chuck         1.99      void String::toUpper()
 991 david         1.90      {
 992                         #ifdef PEGASUS_HAS_ICU
 993 mike          1.111.6.1 
 994 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 995 chuck         1.99          {
 996 mike          1.111.6.1         if (Atomic_get(&_rep->refs) != 1)
 997 mike          1.111.6.8             _rep = StringRep::copyOnWrite(_rep);
 998 mike          1.111.6.1 
 999 yi.zhou       1.108             // This will do a locale-insensitive, but context-sensitive convert.
1000 mike          1.111.6.1         // Since context-sensitive casing looks at adjacent chars, this 
1001                                 // prevents optimizations where the us-ascii is converted before 
1002                                 // calling ICU.
1003 yi.zhou       1.108             // The string may shrink or expand after the convert.
1004                         
1005 mike          1.111.6.8         //// First calculate size of resulting string. u_strToUpper() returns
1006                                 //// only the size when zero is passed as the destination size argument.
1007 mike          1.111.6.1 
1008 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
1009                         
1010 mike          1.111.6.6         int32_t newSize = u_strToUpper(
1011 mike          1.111.6.8             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1012 chuck         1.99      
1013 mike          1.111.6.1         err = U_ZERO_ERROR;
1014 yi.zhou       1.108     
1015 mike          1.111.6.8         //// Reserve enough space for the result.
1016 david         1.90      
1017 mike          1.111.6.8         if ((Uint32)newSize > _rep->cap)
1018                                     _reserve(_rep, newSize);
1019 kumpf         1.39      
1020 mike          1.111.6.8         //// Perform the conversion (overlapping buffers are allowed).
1021 mike          1.27      
1022 mike          1.111.6.6         u_strToUpper((UChar*)_rep->data, newSize,
1023 mike          1.111.6.8             (UChar*)_rep->data, _rep->size, NULL, &err);
1024 mike          1.111.6.1 
1025 mike          1.111.6.8         _rep->size = newSize;
1026 mike          1.111.6.1 
1027 mike          1.111.6.8         return;
1028 mike          1.27          }
1029                         
1030 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1031                         
1032                             if (Atomic_get(&_rep->refs) != 1)
1033 mike          1.111.6.8         _rep = StringRep::copyOnWrite(_rep);
1034 mike          1.111.6.1 
1035                             Uint16* p = _rep->data;
1036                             size_t n = _rep->size;
1037                         
1038                             for (; n--; p++)
1039 mike          1.111.6.8         *p = _toUpper(*p);
1040 mike          1.27      }
1041                         
1042 mike          1.111.6.1 int String::compare(const String& s1, const String& s2, Uint32 n)
1043 mike          1.30      {
1044 mike          1.111.6.1     assert(n <= s1._rep->size);
1045                             assert(n <= s2._rep->size);
1046 kumpf         1.43      
1047 mike          1.111.6.1     // Ignoring error in which n is greater than s1.size() or s2.size()
1048                             return _compare(s1._rep->data, s2._rep->data, n);
1049                         }
1050 mike          1.30      
1051 mike          1.111.6.1 int String::compare(const String& s1, const String& s2)
1052                         {
1053                             return _compare(s1._rep->data, s2._rep->data);
1054                         }
1055 mike          1.30      
1056 mike          1.111.6.1 int String::compare(const String& s1, const char* s2)
1057                         {
1058 mike          1.111.6.6     _checkNullPointer(s2);
1059 mike          1.30      
1060 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
1061 mike          1.111.6.6     return _compareNoUTF8(s1._rep->data, s2);
1062 mike          1.111.6.1 #else
1063                             // ATTN: optimize this!
1064                             return String::compare(s1, String(s2));
1065                         #endif
1066 mike          1.30      }
1067                         
1068 mike          1.111.6.1 int String::compareNoCase(const String& str1, const String& str2)
1069 kumpf         1.40      {
1070 david         1.69      #ifdef PEGASUS_HAS_ICU
1071 mike          1.111.6.1 
1072 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
1073                             {
1074 mike          1.111.6.1         return  u_strcasecmp(
1075 mike          1.111.6.8             str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1076 yi.zhou       1.108         }
1077 kumpf         1.40      
1078 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1079 kumpf         1.40      
1080 mike          1.111.6.1     const Uint16* s1 = str1._rep->data;
1081                             const Uint16* s2 = str2._rep->data;
1082                         
1083                             while (*s1 && *s2)
1084                             {
1085 mike          1.111.6.6         int r = _toLower(*s1++) - _toLower(*s2++);
1086 kumpf         1.40      
1087 david.dillard 1.105             if (r)
1088                                     return r;
1089 kumpf         1.40          }
1090                         
1091 mike          1.111.6.1     if (*s2)
1092 david.dillard 1.105             return -1;
1093 mike          1.111.6.1     else if (*s1)
1094 david.dillard 1.105             return 1;
1095 kumpf         1.40      
1096                             return 0;
1097                         }
1098                         
1099 mike          1.111.6.6 Boolean StringEqualNoCase(const String& s1, const String& s2)
1100 mike          1.27      {
1101 david         1.69      #ifdef PEGASUS_HAS_ICU
1102 kumpf         1.39      
1103 mike          1.111.6.1     return String::compareNoCase(s1, s2) == 0;
1104 kumpf         1.39      
1105 mike          1.111.6.1 #else /* PEGASUS_HAS_ICU */
1106                         
1107 mike          1.111.6.5     Uint16* p = (Uint16*)s1.getChar16Data();
1108                             Uint16* q = (Uint16*)s2.getChar16Data();
1109                             Uint32 n = s2.size();
1110 mike          1.111.6.1 
1111                             while (n >= 8)
1112                             {
1113 mike          1.111.6.8         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1114                                     ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1115                                     ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1116                                     ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1117                                     ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1118                                     ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1119                                     ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1120                                     ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1121                                 {
1122                                     return false;
1123                                 }
1124                         
1125                                 n -= 8;
1126                                 p += 8;
1127                                 q += 8;
1128 mike          1.111.6.1     }
1129                         
1130                             while (n >= 4)
1131                             {
1132 mike          1.111.6.8         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1133                                     ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1134                                     ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1135                                     ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1136                                 {
1137                                     return false;
1138                                 }
1139                         
1140                                 n -= 4;
1141                                 p += 4;
1142                                 q += 4;
1143 mike          1.111.6.1     }
1144 mike          1.27      
1145 kumpf         1.39          while (n--)
1146                             {
1147 mike          1.111.6.8         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1148                                     return false;
1149 mike          1.111.6.1 
1150 mike          1.111.6.8         p++;
1151                                 q++;
1152 kumpf         1.39          }
1153 mike          1.28      
1154 kumpf         1.39          return true;
1155 david         1.69      
1156 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1157                         }
1158 mike          1.27      
1159 mike          1.111.6.1 Boolean String::equalNoCase(const String& s1, const char* s2)
1160 david         1.69      {
1161 mike          1.111.6.6     _checkNullPointer(s2);
1162 david         1.69      
1163 mike          1.111.6.1 #if defined(PEGASUS_HAS_ICU)
1164 david         1.69      
1165 mike          1.111.6.1     return String::equalNoCase(s1, String(s2));
1166 david.dillard 1.105     
1167 mike          1.111.6.1 #elif defined(PEGASUS_STRING_NO_UTF8)
1168 david         1.71      
1169 mike          1.111.6.1     const Uint16* p1 = (Uint16*)s1._rep->data;
1170                             const char* p2 = s2;
1171                             size_t n = s1._rep->size;
1172 kumpf         1.42      
1173 mike          1.111.6.1     while (n--)
1174                             {
1175 mike          1.111.6.8         if (!*p2)
1176                                     return false;
1177 karl          1.36      
1178 mike          1.111.6.8         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1179                                     return false;
1180 mike          1.111.6.1     }
1181 david.dillard 1.105     
1182 mike          1.111.6.1     if (*p2)
1183 mike          1.111.6.8         return false;
1184 mike          1.111.6.1     
1185                             return true;
1186 david.dillard 1.105     
1187 mike          1.111.6.1 #else /* PEGASUS_HAS_ICU */
1188 chuck         1.78      
1189 mike          1.111.6.1     // ATTN: optimize this!
1190                             return String::equalNoCase(s1, String(s2));
1191 david.dillard 1.105     
1192 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1193                         }
1194 karl          1.36      
1195 mike          1.111.6.1 Boolean String::equal(const String& s1, const String& s2)
1196 karl          1.36      {
1197 mike          1.111.6.1     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 
1198 mike          1.111.6.8         s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1199 karl          1.36      }
1200                         
1201 mike          1.111.6.1 Boolean String::equal(const String& s1, const char* s2)
1202 karl          1.36      {
1203 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
1204 kumpf         1.39      
1205 mike          1.111.6.6     _checkNullPointer(s2);
1206 kumpf         1.39      
1207 mike          1.111.6.1     const Uint16* p = (Uint16*)s1._rep->data;
1208                             const char* q = s2;
1209 kumpf         1.39      
1210 mike          1.111.6.1     while (*p && *q)
1211                             {
1212 mike          1.111.6.8         if (*p++ != Uint16(*q++))
1213                                     return false;
1214 mike          1.111.6.1     }
1215 kumpf         1.39      
1216 mike          1.111.6.1     return !(*p || *q);
1217 kumpf         1.39      
1218 mike          1.111.6.1 #else /* PEGASUS_STRING_NO_UTF8 */
1219 kumpf         1.39      
1220 mike          1.111.6.1     return String::equal(s1, String(s2));
1221                         
1222                         #endif /* PEGASUS_STRING_NO_UTF8 */
1223 kumpf         1.39      }
1224                         
1225 kumpf         1.47      PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1226 kumpf         1.39      {
1227 david         1.69      #if defined(PEGASUS_OS_OS400)
1228 mike          1.111.6.1 
1229 david         1.93          CString cstr = str.getCString();
1230 david         1.69          const char* utf8str = cstr;
1231                             os << utf8str;
1232 mike          1.111.6.1     return os;
1233                         #else    
1234                         
1235                         #if defined(PEGASUS_HAS_ICU)
1236 david         1.69      
1237 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
1238                             {
1239 david.dillard 1.105             char *buf = NULL;
1240                                 const int size = str.size() * 6;
1241 mike          1.111.6.1         UnicodeString UniStr(
1242 mike          1.111.6.8             (const UChar *)str.getChar16Data(), (int32_t)str.size());
1243 david.dillard 1.105             Uint32 bufsize = UniStr.extract(0,size,buf);
1244                                 buf = new char[bufsize+1];
1245                                 UniStr.extract(0,bufsize,buf);
1246                                 os << buf;
1247                                 os.flush();
1248                                 delete [] buf;
1249 mike          1.111.6.1         return os;       
1250 yi.zhou       1.108         }
1251 mike          1.111.6.1 
1252                         #endif  // PEGASUS_HAS_ICU 
1253                         
1254                             for (Uint32 i = 0, n = str.size(); i < n; i++)
1255 yi.zhou       1.108         {
1256 mike          1.111.6.1         Uint16 code = str[i];
1257 david.dillard 1.105     
1258 mike          1.111.6.8         if (code > 0 && !(code & 0xFF00))
1259                                         os << char(code);
1260 mike          1.111.6.1         else
1261 mike          1.111.6.8             {
1262 mike          1.111.6.1             // Print in hex format:
1263                                     char buffer[8];
1264                                     sprintf(buffer, "\\x%04X", code);
1265                                     os << buffer;
1266 david.dillard 1.105             }
1267 yi.zhou       1.108         }
1268 kumpf         1.39      
1269                             return os;
1270 mike          1.111.6.1 #endif // PEGASUS_OS_OS400
1271 kumpf         1.39      }
1272                         
1273 mike          1.111.6.6 void StringAppendCharAux(StringRep*& _rep)
1274 kumpf         1.39      {
1275 mike          1.111.6.1     StringRep* tmp;
1276 kumpf         1.39      
1277 mike          1.111.6.1     if (_rep->cap)
1278                             {
1279 mike          1.111.6.8         tmp = StringRep::alloc(2 * _rep->cap);
1280                                 tmp->size = _rep->size;
1281                                 _copy(tmp->data, _rep->data, _rep->size);
1282 mike          1.111.6.1     }
1283                             else
1284                             {
1285 mike          1.111.6.8         tmp = StringRep::alloc(8);
1286                                 tmp->size = 0;
1287 mike          1.111.6.1     }
1288 kumpf         1.39      
1289 mike          1.111.6.1     StringRep::unref(_rep);
1290                             _rep = tmp;
1291 kumpf         1.39      }
1292                         
1293 mike          1.111.6.1 PEGASUS_NAMESPACE_END
1294 kumpf         1.39      
1295 mike          1.111.6.1 /*
1296                         ================================================================================
1297 kumpf         1.39      
1298 mike          1.111.6.1 String optimizations:
1299                         
1300                             1.  Added mechanism allowing certain functions to be inlined only when
1301 mike          1.111.6.8         used by internal Pegasus modules. External modules (i.e., providers)
1302                                 link to a non-inline version, which allows for binary compatibility.
1303 mike          1.111.6.1 
1304                             2.  Implemented copy-on-write with atomic increment/decrement. This
1305 mike          1.111.6.8         yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1306                                 for the 'ni1000' benchmark.
1307 mike          1.111.6.1 
1308 mike          1.111.6.8     3.  Employed loop unrolling in several places. For example, see:
1309 mike          1.111.6.1 
1310 mike          1.111.6.8             static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1311 mike          1.111.6.1 
1312                             4.  Used the "empty-rep" optimization (described in whitepaper from the
1313 mike          1.111.6.8         GCC Developers Summit). This reduced default construction to a simple
1314                                 pointer assignment.
1315 mike          1.111.6.1 
1316 mike          1.111.6.8             inline String::String() : _rep(&_emptyRep) { }
1317 mike          1.111.6.1 
1318                             5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1319 mike          1.111.6.8         For example:
1320 mike          1.111.6.1 
1321 mike          1.111.6.8             static const char _upper[] =
1322                                     {
1323                                         0,1,2,...255
1324                                     };
1325                         
1326                                     inline Uint16 _toUpper(Uint16 x)
1327                                     {
1328                                         return (x & 0xFF00) ? x : _upper[x];
1329                                     }
1330 mike          1.111.6.1 
1331 mike          1.111.6.8         This outperforms the system implementation by avoiding an anding 
1332                                 operation.
1333 mike          1.111.6.1 
1334                             6.  Implemented char* version of the following member functions to 
1335 mike          1.111.6.8         eliminate unecessary creation of anonymous string objects 
1336                                 (temporaries).
1337 mike          1.111.6.1 
1338 mike          1.111.6.8             String(const String& s1, const char* s2);
1339                                     String(const char* s1, const String& s2);
1340                                     String& String::operator=(const char* str);
1341                                     Uint32 String::find(const char* s) const;
1342                                     bool String::equal(const String& s1, const char* s2);
1343                                     static int String::compare(const String& s1, const char* s2);
1344                                     String& String::append(const char* str);
1345                                     String& String::append(const char* str, Uint32 size);
1346                                     static bool String::equalNoCase(const String& s1, const char* s2);
1347                                     String& operator=(const char* str)
1348                                     String& String::assign(const char* str)
1349                                     String& String::append(const char* str)
1350                                     Boolean operator==(const String& s1, const char* s2)
1351                                     Boolean operator==(const char* s1, const String& s2)
1352                                     Boolean operator!=(const String& s1, const char* s2)
1353                                     Boolean operator!=(const char* s1, const String& s2)
1354                                     Boolean operator<(const String& s1, const char* s2)
1355                                     Boolean operator<(const char* s1, const String& s2)
1356                                     Boolean operator>(const String& s1, const char* s2)
1357                                     Boolean operator>(const char* s1, const String& s2)
1358                                     Boolean operator<=(const String& s1, const char* s2)
1359 mike          1.111.6.8             Boolean operator<=(const char* s1, const String& s2)
1360                                     Boolean operator>=(const String& s1, const char* s2)
1361                                     Boolean operator>=(const char* s1, const String& s2)
1362                                     String operator+(const String& s1, const char* s2)
1363                                     String operator+(const char* s1, const String& s2)
1364 mike          1.111.6.1 
1365 mike          1.111.6.5     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next 
1366 mike          1.111.6.1         power of two (algorithm from the book "Hacker's Delight").
1367                         
1368 mike          1.111.6.8             static Uint32 _roundUpToPow2(Uint32 x)
1369                                     {
1370                                         if (x < 8)
1371                                             return 8;
1372                         
1373                                         x--;
1374                                         x |= (x >> 1);
1375                                         x |= (x >> 2);
1376                                         x |= (x >> 4);
1377                                         x |= (x >> 8);
1378                                         x |= (x >> 16);
1379                                         x++;
1380 mike          1.111.6.1 
1381 mike          1.111.6.8                 return x;
1382                                     }
1383 mike          1.111.6.1 
1384                             8.  Implemented "concatenating constructors" to eliminate temporaries
1385 mike          1.111.6.8         created by operator+(). This scheme employs the "return-value 
1386                                 optimization" described by Stan Lippman.
1387 mike          1.111.6.1 
1388 mike          1.111.6.8             inline String operator+(const String& s1, const String& s2)
1389                                     {
1390                                         return String(s1, s2, 0);
1391                                     }
1392 mike          1.111.6.1 
1393                             9.  Experimented to find the optimial initial size for a short string.
1394 mike          1.111.6.8         Eight seems to offer the best tradeoff between space and time.
1395 mike          1.111.6.1 
1396                             10. Inlined all members of the Char16 class.
1397                         
1398                             11. Used Uint16 internally in the String class. This showed no improvememnt
1399 mike          1.111.6.8         since Char16 was already fully inlined and was essentially reduced to
1400                                 Uint16 in any case.
1401 mike          1.111.6.1 
1402                             12. Implemented conditional logic (#if) allowing error checking logic to
1403 mike          1.111.6.8         be excluded to better performance. Examples include bounds checking 
1404                                 and null-pointer checking.
1405 mike          1.111.6.1 
1406                             13. Used memcpy() and memcmp() where possible. These are implemented using
1407 mike          1.111.6.8         the rep family of intructions under Intel and are much faster.
1408 mike          1.111.6.1 
1409                             14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 
1410 mike          1.111.6.8         copy routine overhead.
1411 mike          1.111.6.1 
1412                             15. Added ASCII7 form of the constructor and assign().
1413                         
1414 mike          1.111.6.8             String s("hello world", String::ASCII7);
1415 mike          1.111.6.1 
1416 mike          1.111.6.8             s.assignASCII7("hello world");
1417 mike          1.111.6.1 
1418 mike          1.111.6.8         This avoids slower UTF8 processing when not needed.
1419 mike          1.111.6.1 
1420 mike          1.111.6.5 ================================================================================
1421                         
1422                         TO-DO:
1423                         
1424 mike          1.111.6.8     (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1425 mike          1.111.6.5 
1426 mike          1.111.6.8     (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1427 mike          1.111.6.5 
1428                             (+) [DONE] Eliminate char versions of find() and append().
1429                         
1430                             (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1431                         
1432                             (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1433                         
1434                             (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1435                         
1436                             (+) [DONE] Comment StringRep allocation layout.
1437                         
1438                             (+) [DONE] Conceal private inline functions.
1439 mike          1.111.6.1 
1440 mike          1.111.6.5     (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1441 mike          1.111.6.1 
1442 mike          1.111.6.5     (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1443 mike          1.111.6.8         rid of altogether.
1444 mike          1.111.6.1 
1445 mike          1.111.6.6     (+) [DONE] useCamelNotationOnAllFunctionNames.
1446                         
1447 mike          1.111.6.7     (+) [DONE] Check for overlow condition in StringRep::alloc().
1448 mike          1.111.6.1 
1449 mike          1.111.6.9     (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1450                         
1451 mike          1.111.6.7     -----------
1452 mike          1.111.6.1 
1453 mike          1.111.6.5     (+) Fix throw-related memory leak.
1454 mike          1.111.6.1 
1455 mike          1.111.6.8     (+) DOC++ String.h
1456                                 
1457 mike          1.111.6.5     (+) Look at PEP223 for coding security guidelines.
1458 mike          1.111.6.1 
1459 mike          1.111.6.6     (+) Replace AtomicInt with new Atomic implementation.
1460 mike          1.111.6.1 
1461 mike          1.111.6.5     (+) Implement Atomic operations for HP.
1462 mike          1.111.6.3 
1463 mike          1.111.6.1 ================================================================================
1464                         */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2