(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30                     // Author: Mike Brasher (mbrasher@bmc.com)
  31                     //
  32 mike          1.111.6.1 // Modified By: 
  33                         //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  34                         //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  35                         //     David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
  36                         //     Mike Brasher (mike-brasher@austin.rr.com)
  37 mike          1.27      //
  38                         //%/////////////////////////////////////////////////////////////////////////////
  39                         
  40 mike          1.111.6.1 #include <cassert>
  41 kumpf         1.48      #include "InternalException.h"
  42 david         1.69      #include "CommonUTF.h"
  43 mike          1.111.6.1 #include "MessageLoader.h"
  44 mike          1.111.6.2 #include "StringRep.h"
  45 david         1.69      
  46                         #ifdef PEGASUS_HAS_ICU
  47 chuck         1.99      #include <unicode/ustring.h>
  48                         #include <unicode/uchar.h>
  49 david         1.69      #endif
  50                         
  51 mike          1.27      PEGASUS_NAMESPACE_BEGIN
  52                         
  53 mike          1.111.6.1 //==============================================================================
  54                         //
  55                         // Compile-time macros (undefined by default).
  56                         //
  57                         //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  58                         //      
  59                         //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  60                         //
  61                         //==============================================================================
  62                         
  63                         //==============================================================================
  64 kumpf         1.39      //
  65 mike          1.111.6.1 // File-scope definitions:
  66 kumpf         1.54      //
  67 mike          1.111.6.1 //==============================================================================
  68                         
  69 mike          1.111.6.6 const Uint8 _toUpperTable[256] = 
  70 mike          1.111.6.1 {
  71                             0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  72                             0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  73                             0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  74                             0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  75                             0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  76                             0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  77                             0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  78                             0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  79                             0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  80                             0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  81                             0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  82                             0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  83                             0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  84                             0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  85                             0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  86                             0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  87                             0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  88                             0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  89                             0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  90                             0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  91 mike          1.111.6.1     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  92                             0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  93                             0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  94                             0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  95                             0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  96                             0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  97                             0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  98                             0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  99                             0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 100                             0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 101                             0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 102                             0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 103                         };
 104                         
 105 mike          1.111.6.6 const Uint8 _toLowerTable[256] = 
 106 mike          1.111.6.1 {
 107                             0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 108                             0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 109                             0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 110                             0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 111                             0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 112                             0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 113                             0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 114                             0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 115                             0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 116                             0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 117                             0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 118                             0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 119                             0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 120                             0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 121                             0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 122                             0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 123                             0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 124                             0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 125                             0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 126                             0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 127 mike          1.111.6.1     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 128                             0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 129                             0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 130                             0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 131                             0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 132                             0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 133                             0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 134                             0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 135                             0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 136                             0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 137                             0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 138                             0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 139                         };
 140 kumpf         1.54      
 141 mike          1.111.6.1 // Converts 16-bit characters to upper case.
 142 mike          1.111.6.6 inline Uint16 _toUpper(Uint16 x)
 143 kumpf         1.54      {
 144 mike          1.111.6.6     return (x & 0xFF00) ? x : _toUpperTable[x];
 145 kumpf         1.54      }
 146                         
 147 mike          1.111.6.1 // Converts 16-bit characters to lower case.
 148 mike          1.111.6.6 inline Uint16 _toLower(Uint16 x)
 149 kumpf         1.54      {
 150 mike          1.111.6.6     return (x & 0xFF00) ? x : _toLowerTable[x];
 151 mike          1.111.6.1 }
 152 kumpf         1.82      
 153 mike          1.111.6.6 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 154 mike          1.111.6.5 static Uint32 _roundUpToPow2(Uint32 x)
 155 mike          1.111.6.1 {
 156 mike          1.111.6.6     if (x > 0x0FFFFFFF)
 157                         	throw PEGASUS_STD(bad_alloc)();
 158                         
 159 mike          1.111.6.1     if (x < 8)
 160                         	return 8;
 161                         
 162                             x--;
 163                             x |= (x >> 1);
 164                             x |= (x >> 2);
 165                             x |= (x >> 4);
 166                             x |= (x >> 8);
 167                             x |= (x >> 16);
 168                             x++;
 169                         
 170                             return x;
 171                         }
 172                         
 173                         template<class P, class Q>
 174                         static void _copy(P* p, const Q* q, size_t n)
 175                         {
 176                             // Use loop unrolling.
 177                         
 178                             while (n >= 8)
 179 kumpf         1.82          {
 180 mike          1.111.6.1 	p[0] = q[0];
 181                         	p[1] = q[1];
 182                         	p[2] = q[2];
 183                         	p[3] = q[3];
 184                         	p[4] = q[4];
 185                         	p[5] = q[5];
 186                         	p[6] = q[6];
 187                         	p[7] = q[7];
 188                         	p += 8;
 189                         	q += 8;
 190                         	n -= 8;
 191 kumpf         1.82          }
 192 mike          1.111.6.1 
 193                             while (n >= 4)
 194                             {
 195                         	p[0] = q[0];
 196                         	p[1] = q[1];
 197                         	p[2] = q[2];
 198                         	p[3] = q[3];
 199                         	p += 4;
 200                         	q += 4;
 201                         	n -= 4;
 202                             }
 203                         
 204                             while (n--)
 205                         	*p++ = *q++;
 206 kumpf         1.54      }
 207                         
 208 mike          1.111.6.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 209 kumpf         1.54      {
 210 mike          1.111.6.1     while (n >= 4)
 211                             {
 212                         	if (s[0] == c)
 213                         	    return (Uint16*)s;
 214                         	if (s[1] == c)
 215                         	    return (Uint16*)&s[1];
 216                         	if (s[2] == c)
 217                         	    return (Uint16*)&s[2];
 218                         	if (s[3] == c)
 219                         	    return (Uint16*)&s[3];
 220                         
 221                         	n -= 4;
 222                         	s += 4;
 223                             }
 224                         
 225                             if (n)
 226                             {
 227                         	if (*s == c)
 228                         	    return (Uint16*)s;
 229                         	s++;
 230                         	n--;
 231 mike          1.111.6.1     }
 232                         
 233                             if (n)
 234                             {
 235                         	if (*s == c)
 236                         	    return (Uint16*)s;
 237                         	s++;
 238                         	n--;
 239                             }
 240                         
 241                             if (n && *s == c)
 242                         	return (Uint16*)s;
 243                         
 244                             // Not found!
 245                             return 0;
 246 kumpf         1.54      }
 247                         
 248 mike          1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2)
 249 kumpf         1.54      {
 250 mike          1.111.6.1     while (*s1 && *s2)
 251 kumpf         1.82          {
 252 mike          1.111.6.1         int r = *s1++ - *s2++;
 253                         
 254                                 if (r)
 255                                     return r;
 256 kumpf         1.82          }
 257 mike          1.111.6.1 
 258                             if (*s2)
 259                                 return -1;
 260                             else if (*s1)
 261                                 return 1;
 262                         
 263                             return 0;
 264 kumpf         1.54      }
 265                         
 266 mike          1.111.6.6 static int _compareNoUTF8(const Uint16* s1, const char* s2)
 267 kumpf         1.56      {
 268 mike          1.111.6.1     Uint16 c1;
 269                             Uint16 c2;
 270                         
 271                             do
 272 kumpf         1.81          {
 273 mike          1.111.6.1 	c1 = *s1++;
 274                         	c2 = *s2++;
 275                         
 276                         	if (c1 == 0)
 277                         	    return c1 - c2;
 278 kumpf         1.81          }
 279 mike          1.111.6.1     while (c1 == c2);
 280                         
 281                             return c1 - c2;
 282 kumpf         1.56      }
 283                         
 284 mike          1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
 285 kumpf         1.54      {
 286 mike          1.111.6.1     // This should only be called when s1 and s2 have the same length.
 287                         
 288                             while (n-- && (*s1++ - *s2++) == 0)
 289                         	;
 290                         
 291                             // 
 292                         
 293                             return s1[-1] - s2[-1];
 294 kumpf         1.54      }
 295                         
 296 mike          1.111.6.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 297                         {
 298                             memcpy(s1, s2, n * sizeof(Uint16));
 299                         }
 300 kumpf         1.39      
 301 mike          1.111.6.6 void StrinThrowOutOfBounds()
 302 mike          1.111.6.1 {
 303                             throw IndexOutOfBoundsException();
 304                         }
 305 mike          1.27      
 306 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_THROW
 307 mike          1.111.6.6 # define _checkNullPointer(ARG) /* empty */
 308 mike          1.111.6.1 #else
 309                         template<class T>
 310 mike          1.111.6.6 inline void _checkNullPointer(const T* ptr)
 311 mike          1.27      {
 312 mike          1.111.6.1     if (!ptr)
 313                         	throw NullPointer();
 314                         }
 315                         #endif
 316                         
 317 mike          1.111.6.6 static size_t _copyFromUTF8(Uint16* dest, const char* src, size_t n)
 318 mike          1.111.6.1 {
 319                             Uint16* p = dest;
 320                             const Uint8* q = (const Uint8*)src;
 321 mike          1.27      
 322 mike          1.111.6.1     // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 323                             // Use loop-unrolling.
 324 mike          1.27      
 325 mike          1.111.6.1     while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 326                             {
 327                         	p[0] = q[0];
 328                         	p[1] = q[1];
 329                         	p[2] = q[2];
 330                         	p[3] = q[3];
 331                         	p[4] = q[4];
 332                         	p[5] = q[5];
 333                         	p[6] = q[6];
 334                         	p[7] = q[7];
 335                         	p += 8;
 336                         	q += 8;
 337                         	n -= 8;
 338                             }
 339                         
 340                             while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 341                             {
 342                         	p[0] = q[0];
 343                         	p[1] = q[1];
 344                         	p[2] = q[2];
 345                         	p[3] = q[3];
 346 mike          1.111.6.1 	p += 4;
 347                         	q += 4;
 348                         	n -= 4;
 349                             }
 350                         
 351                             switch (n)
 352                             {
 353                         	case 0:
 354                         	    return p - dest;
 355                         	case 1:
 356                         	    if (q[0] < 128)
 357                         	    {
 358                         		p[0] = q[0];
 359                         		return p + 1 - dest;
 360                         	    }
 361                         	    break;
 362                         	case 2:
 363                         	    if (((q[0]|q[1]) & 0x80) == 0)
 364                         	    {
 365                         		p[0] = q[0];
 366                         		p[1] = q[1];
 367 mike          1.111.6.1 		return p + 2 - dest;
 368                         	    }
 369                         	    break;
 370                         	case 3:
 371                         	    if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 372                         	    {
 373                         		p[0] = q[0];
 374                         		p[1] = q[1];
 375                         		p[2] = q[2];
 376                         		return p + 3 - dest;
 377                         	    }
 378                         	    break;
 379                             }
 380                         
 381                             // Process remaining characters.
 382                         
 383                             while (n)
 384                             {
 385                         	// Optimize for 7-bit ASCII case.
 386                         
 387                         	if (*q < 128)
 388 mike          1.111.6.1 	{
 389                         	    *p++ = *q++;
 390                         	    n--;
 391                         	}
 392                         	else
 393                         	{
 394                         	    Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 395                         
 396                         	    if (c > n || !isValid_U8(q, c) ||
 397                         		UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 398                         	    {
 399                         		MessageLoaderParms parms("Common.String.BAD_UTF8",
 400                         		    "The byte sequence starting at index $0 "
 401                         		    "is not valid UTF-8 encoding.",
 402                         		     q - (const Uint8*)src);
 403                         		throw Exception(parms);
 404                         	    }
 405                         
 406                         	    n -= c;
 407                         	}
 408                             }
 409 mike          1.111.6.1 
 410                             return p - dest;
 411                         }
 412                         
 413                         // Note: dest must be at least three times src (plus an extra byte for 
 414                         // terminator).
 415 mike          1.111.6.6 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 416 mike          1.111.6.1 {
 417                             const Uint16* q = src;
 418                             Uint8* p = (Uint8*)dest;
 419                         
 420                             while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 421                             {
 422                         	p[0] = q[0];
 423                         	p[1] = q[1];
 424                         	p[2] = q[2];
 425                         	p[3] = q[3];
 426                         	p += 4;
 427                         	q += 4;
 428                         	n -= 4;
 429                             }
 430                         
 431                             switch (n)
 432                             {
 433                         	case 0:
 434                         	    return p - (Uint8*)dest;
 435                         	case 1:
 436                         	    if (q[0] < 128)
 437 mike          1.111.6.1 	    {
 438                         		p[0] = q[0];
 439                         		return p + 1 - (Uint8*)dest;
 440                         	    }
 441                         	    break;
 442                         	case 2:
 443                         	    if (q[0] < 128 && q[1] < 128)
 444                         	    {
 445                         		p[0] = q[0];
 446                         		p[1] = q[1];
 447                         		return p + 2 - (Uint8*)dest;
 448                         	    }
 449                         	    break;
 450                         	case 3:
 451                         	    if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 452                         	    {
 453                         		p[0] = q[0];
 454                         		p[1] = q[1];
 455                         		p[2] = q[2];
 456                         		return p + 3 - (Uint8*)dest;
 457                         	    }
 458 mike          1.111.6.1 	    break;
 459                             }
 460                         
 461                             // If this line was reached, there must be characters greater than 128.
 462                         
 463                             UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 464 mike          1.27      
 465 mike          1.111.6.1     return p - (Uint8*)dest;
 466                         }
 467                         
 468                         static inline size_t _convert(Uint16* p, const char* q, size_t n)
 469                         {
 470                         #ifdef PEGASUS_STRING_NO_UTF8
 471                             _copy(p, q, n);
 472 mike          1.27          return n;
 473 mike          1.111.6.1 #else
 474 mike          1.111.6.6     return _copyFromUTF8(p, q, n);
 475 mike          1.111.6.1 #endif
 476 mike          1.27      }
 477                         
 478 mike          1.111.6.1 //==============================================================================
 479 chuck         1.102     //
 480 mike          1.111.6.1 // class CString
 481 chuck         1.102     //
 482 mike          1.111.6.1 //==============================================================================
 483                         
 484                         CString::CString(const CString& cstr) : _rep(0)
 485 chuck         1.102     {
 486 mike          1.111.6.1     if (cstr._rep)
 487 chuck         1.102         {
 488 mike          1.111.6.1 	size_t n = strlen(cstr._rep) + 1;
 489                                 _rep = (char*)operator new(n);
 490                         	memcpy(_rep, cstr._rep, n);
 491                             }
 492                         }
 493                         
 494                         CString& CString::operator=(const CString& cstr)
 495                         {
 496                             if (&cstr != this)
 497                             {
 498                                 if (_rep)
 499 david.dillard 1.105             {
 500 mike          1.111.6.1             operator delete(_rep);
 501                                     _rep = 0;
 502 chuck         1.102             }
 503 mike          1.111.6.1 
 504                                 if (cstr._rep)
 505 chuck         1.102             {
 506 mike          1.111.6.1 	    size_t n = strlen(cstr._rep) + 1;
 507                                     _rep = (char*)operator new(n);
 508                         	    memcpy(_rep, cstr._rep, n);
 509 chuck         1.102             }
 510 mike          1.111.6.1     }
 511 chuck         1.102     
 512 mike          1.111.6.1     return *this;
 513                         }
 514                         
 515                         //==============================================================================
 516                         //
 517                         // class StringRep
 518                         //
 519                         //==============================================================================
 520                         
 521 mike          1.111.6.6 StringRep StringRep::_emptyRep;
 522 chuck         1.102     
 523 mike          1.111.6.1 inline StringRep* StringRep::alloc(size_t cap)
 524                         {
 525 mike          1.111.6.6     // Any string bigger than this is seriously suspect.
 526                             if (cap > 0x0FFFFFFF)
 527                         	throw PEGASUS_STD(bad_alloc)();
 528                         
 529 mike          1.111.6.1     StringRep* rep = (StringRep*)::operator new(
 530                         	sizeof(StringRep) + cap * sizeof(Uint16));
 531                             rep->cap = cap;
 532                             Atomic_create(&rep->refs, 1);
 533                         
 534                             return rep;
 535 chuck         1.102     }
 536                         
 537 mike          1.111.6.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
 538 kumpf         1.43      {
 539 mike          1.111.6.1     if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
 540                             {
 541 mike          1.111.6.5 	size_t n = _roundUpToPow2(cap);
 542 mike          1.111.6.6 	StringRep* newRep = StringRep::alloc(n);
 543                         	newRep->size = rep->size;
 544                         	_copy(newRep->data, rep->data, rep->size + 1);
 545 mike          1.111.6.1 	StringRep::unref(rep);
 546 mike          1.111.6.6 	rep = newRep;
 547 mike          1.111.6.1     }
 548                         }
 549 kumpf         1.43      
 550 mike          1.111.6.1 StringRep* StringRep::create(const Uint16* data, size_t size)
 551                         {
 552                             StringRep* rep = StringRep::alloc(size);
 553                             rep->size = size;
 554                             _copy(rep->data, data, size);
 555                             rep->data[size] = '\0';
 556                             return rep;
 557                         }
 558                         
 559 mike          1.111.6.6 StringRep* StringRep::copyOnWrite(StringRep* rep)
 560 mike          1.111.6.1 {
 561                             // Return a new copy of rep. Release rep.
 562                         
 563 mike          1.111.6.6     StringRep* newRep = StringRep::alloc(rep->size);
 564                             newRep->size = rep->size;
 565                             _copy(newRep->data, rep->data, rep->size);
 566                             newRep->data[newRep->size] = '\0';
 567 mike          1.111.6.1     StringRep::unref(rep);
 568 mike          1.111.6.6     return newRep;
 569 mike          1.111.6.1 }
 570 kumpf         1.43      
 571 mike          1.111.6.1 StringRep* StringRep::create(const char* data, size_t size)
 572 mike          1.27      {
 573 mike          1.111.6.1     StringRep* rep = StringRep::alloc(size);
 574                             rep->size = _convert((Uint16*)rep->data, data, size);
 575                             rep->data[rep->size] = '\0';
 576                         
 577                             return rep;
 578 mike          1.27      }
 579                         
 580 mike          1.111.6.1 StringRep* StringRep::createASCII7(const char* data, size_t size)
 581 mike          1.27      {
 582 mike          1.111.6.1     StringRep* rep = StringRep::alloc(size);
 583                             _copy((Uint16*)rep->data, data, size);
 584                             rep->data[rep->size = size] = '\0';
 585                             return rep;
 586 kumpf         1.39      }
 587 tony          1.66      
 588 mike          1.111.6.1 Uint32 StringRep::length(const Uint16* str)
 589                         {
 590                             // Note: We could unroll this but it is rarely called.
 591                         
 592                             const Uint16* end = (Uint16*)str;
 593                         
 594                             while (*end++)
 595                         	;
 596                         
 597                             return end - str - 1;
 598                         }
 599                         
 600                         //==============================================================================
 601                         //
 602                         // class String
 603                         //
 604                         //==============================================================================
 605                         
 606                         const String String::EMPTY;
 607 mike          1.27      
 608 kumpf         1.39      String::String(const String& str, Uint32 n)
 609                         {
 610 mike          1.111.6.6     _checkBounds(n, str._rep->size);
 611 mike          1.111.6.1     _rep = StringRep::create(str._rep->data, n);
 612 kumpf         1.39      }
 613                         
 614                         String::String(const Char16* str)
 615                         {
 616 mike          1.111.6.6     _checkNullPointer(str);
 617 mike          1.111.6.1     _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 618 mike          1.27      }
 619                         
 620 kumpf         1.39      String::String(const Char16* str, Uint32 n)
 621                         {
 622 mike          1.111.6.6     _checkNullPointer(str);
 623 mike          1.111.6.1     _rep = StringRep::create((Uint16*)str, n);
 624 kumpf         1.39      }
 625                         
 626                         String::String(const char* str)
 627 mike          1.27      {
 628 mike          1.111.6.6     _checkNullPointer(str);
 629 mike          1.111.6.1     _rep = StringRep::create(str, strlen(str));
 630                         }
 631 david.dillard 1.105     
 632 mike          1.111.6.1 String::String(const char* str, String::ASCII7Tag tag)
 633                         {
 634 mike          1.111.6.6     _checkNullPointer(str);
 635 mike          1.111.6.1     _rep = StringRep::createASCII7(str, strlen(str));
 636 mike          1.27      }
 637                         
 638 kumpf         1.39      String::String(const char* str, Uint32 n)
 639 mike          1.27      {
 640 mike          1.111.6.6     _checkNullPointer(str);
 641 mike          1.111.6.1     _rep = StringRep::create(str, n);
 642                         }
 643 david.dillard 1.105     
 644 mike          1.111.6.1 String::String(const char* str, size_t n, String::ASCII7Tag tag)
 645                         {
 646 mike          1.111.6.6     _checkNullPointer(str);
 647 mike          1.111.6.1     _rep = StringRep::createASCII7(str, n);
 648 kumpf         1.39      }
 649 mike          1.27      
 650 mike          1.111.6.1 String::String(const String& s1, const String& s2)
 651 kumpf         1.39      {
 652 mike          1.111.6.1     size_t n1 = s1._rep->size;
 653                             size_t n2 = s2._rep->size;
 654                             size_t n = n1 + n2;
 655                             _rep = StringRep::alloc(n);
 656                             _copy(_rep->data, s1._rep->data, n1);
 657                             _copy(_rep->data + n1, s2._rep->data, n2);
 658                             _rep->size = n;
 659                             _rep->data[n] = '\0';
 660 mike          1.27      }
 661                         
 662 mike          1.111.6.1 String::String(const String& s1, const char* s2)
 663 mike          1.27      {
 664 mike          1.111.6.6     _checkNullPointer(s2);
 665 mike          1.111.6.1     size_t n1 = s1._rep->size;
 666                             size_t n2 = strlen(s2);
 667                             _rep = StringRep::alloc(n1 + n2);
 668                             _copy(_rep->data, s1._rep->data, n1);
 669                             _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
 670                             _rep->data[_rep->size] = '\0';
 671 mike          1.27      }
 672                         
 673 mike          1.111.6.1 String::String(const char* s1, const String& s2)
 674 mike          1.27      {
 675 mike          1.111.6.6     _checkNullPointer(s1);
 676 mike          1.111.6.1     size_t n1 = strlen(s1);
 677                             size_t n2 = s2._rep->size;
 678                             _rep = StringRep::alloc(n1 + n2);
 679                             _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
 680                             _copy(_rep->data + n1, s2._rep->data, n2);
 681                             _rep->data[_rep->size] = '\0';
 682 mike          1.27      }
 683                         
 684 mike          1.111.6.1 String& String::assign(const String& str)
 685 mike          1.27      {
 686 mike          1.111.6.1     if (_rep != str._rep)
 687 david.dillard 1.105         {
 688 mike          1.111.6.1 	StringRep::unref(_rep);
 689                         	StringRep::ref(_rep = str._rep);
 690 david.dillard 1.105         }
 691                         
 692 mike          1.27          return *this;
 693                         }
 694                         
 695                         String& String::assign(const Char16* str, Uint32 n)
 696                         {
 697 mike          1.111.6.6     _checkNullPointer(str);
 698 mike          1.111.6.1 
 699                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 700 david.dillard 1.105         {
 701 mike          1.111.6.1 	StringRep::unref(_rep);
 702                         	_rep = StringRep::alloc(n);
 703 david.dillard 1.105         }
 704                         
 705 mike          1.111.6.1     _rep->size = n;
 706                             _copy(_rep->data, (Uint16*)str, n);
 707                             _rep->data[n] = '\0';
 708                         
 709 mike          1.27          return *this;
 710                         }
 711                         
 712 mike          1.111.6.1 String& String::assign(const char* str, Uint32 n)
 713 chuck         1.102     {
 714 mike          1.111.6.6     _checkNullPointer(str);
 715 mike          1.111.6.1 
 716                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 717 david.dillard 1.105         {
 718 mike          1.111.6.1 	StringRep::unref(_rep);
 719                         	_rep = StringRep::alloc(n);
 720 david.dillard 1.105         }
 721                         
 722 mike          1.111.6.1     _rep->size = _convert(_rep->data, str, n);
 723                             _rep->data[_rep->size] = 0;
 724                         
 725 chuck         1.102         return *this;
 726                         }
 727                         
 728 mike          1.111.6.1 String& String::assignASCII7(const char* str, Uint32 n)
 729 mike          1.27      {
 730 mike          1.111.6.6     _checkNullPointer(str);
 731 mike          1.111.6.1 
 732                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 733 david.dillard 1.105         {
 734 mike          1.111.6.1 	StringRep::unref(_rep);
 735                         	_rep = StringRep::alloc(n);
 736 david.dillard 1.105         }
 737                         
 738 mike          1.111.6.1     _copy(_rep->data, str, n);
 739                             _rep->data[_rep->size = n] = 0;
 740                         
 741 mike          1.27          return *this;
 742                         }
 743                         
 744 kumpf         1.39      void String::clear()
 745                         {
 746 mike          1.111.6.1     if (_rep->size)
 747                             {
 748                         	if (Atomic_get(&_rep->refs) == 1)
 749 mike          1.111.6.4 	{
 750 mike          1.111.6.1 	    _rep->size = 0;
 751 mike          1.111.6.4 	    _rep->data[0] = '\0';
 752                         	}
 753 mike          1.111.6.1 	else
 754                         	{
 755                         	    StringRep::unref(_rep);
 756 mike          1.111.6.6 	    _rep = &StringRep::_emptyRep;
 757 mike          1.111.6.1 	}
 758                             }
 759 kumpf         1.39      }
 760                         
 761 mike          1.111.6.1 void String::reserveCapacity(Uint32 cap)
 762 kumpf         1.39      {
 763 mike          1.111.6.1     _reserve(_rep, cap);
 764 kumpf         1.39      }
 765                         
 766 mike          1.111.6.1 CString String::getCString() const
 767                         {
 768                         #ifdef PEGASUS_STRING_NO_UTF8
 769                             char* str = (char*)operator new(_rep->size + 1);
 770                             _copy(str, _rep->data, _rep->size);
 771                             str[_rep->size] = '\0';
 772                             return CString(str);
 773 gs.keenan     1.110     #else
 774 mike          1.111.6.1     Uint32 n = 3 * _rep->size;
 775                             char* str = (char*)operator new(n + 1);
 776 mike          1.111.6.6     size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 777 mike          1.111.6.1     str[size] = '\0';
 778                             return CString(str);
 779 gs.keenan     1.110     #endif
 780 kumpf         1.39      }
 781                         
 782 mike          1.111.6.1 String& String::append(const Char16* str, Uint32 n)
 783 kumpf         1.39      {
 784 mike          1.111.6.6     _checkNullPointer(str);
 785 kumpf         1.39      
 786 mike          1.111.6.6     size_t oldSize = _rep->size;
 787                             size_t newSize = oldSize + n;
 788                             _reserve(_rep, newSize);
 789                             _copy(_rep->data + oldSize, (Uint16*)str, n);
 790                             _rep->size = newSize;
 791                             _rep->data[newSize] = '\0';
 792 mike          1.27      
 793 mike          1.111.6.1     return *this;
 794 mike          1.27      }
 795                         
 796 mike          1.111.6.1 String& String::append(const String& str)
 797 mike          1.27      {
 798 mike          1.111.6.1     return append((Char16*)str._rep->data, str._rep->size);
 799 mike          1.27      }
 800                         
 801 mike          1.111.6.1 String& String::append(const char* str, Uint32 size)
 802 kumpf         1.39      {
 803 mike          1.111.6.6     _checkNullPointer(str);
 804 mike          1.111.6.1 
 805 mike          1.111.6.6     size_t oldSize = _rep->size;
 806                             size_t cap = oldSize + size;
 807 mike          1.111.6.1 
 808                             _reserve(_rep, cap);
 809 mike          1.111.6.6     _rep->size += _convert((Uint16*)_rep->data + oldSize, str, size);
 810 mike          1.111.6.1     _rep->data[_rep->size] = '\0';
 811 kumpf         1.39      
 812 mike          1.27          return *this;
 813                         }
 814                         
 815 mike          1.111.6.1 void String::remove(Uint32 index, Uint32 n)
 816 mike          1.27      {
 817 mike          1.111.6.1     if (n == PEG_NOT_FOUND)
 818                                 n = _rep->size - index;
 819 mike          1.27      
 820 mike          1.111.6.6     _checkBounds(index + n, _rep->size);
 821 mike          1.27      
 822 mike          1.111.6.1     if (Atomic_get(&_rep->refs) != 1)
 823 mike          1.111.6.6 	_rep = StringRep::copyOnWrite(_rep);
 824 mike          1.27      
 825 mike          1.111.6.1     assert(index + n <= _rep->size);
 826                         
 827                             size_t rem = _rep->size - (index + n);
 828                             Uint16* data = _rep->data;
 829                         
 830                             if (rem)
 831                                 memmove(data + index, data + index + n, rem * sizeof(Uint16));
 832                         
 833                             _rep->size -= n;
 834                             data[_rep->size] = '\0';
 835 mike          1.27      }
 836                         
 837 mike          1.111.6.1 String String::subString(Uint32 index, Uint32 n) const
 838 mike          1.27      {
 839 mike          1.111.6.1     // Note: this implementation is very permissive but used for
 840                             // backwards compatibility.
 841                         
 842                             if (index < _rep->size)
 843 mike          1.27          {
 844 mike          1.111.6.1 	if (n == PEG_NOT_FOUND || n > _rep->size - index)
 845                         	    n = _rep->size - index;
 846 mike          1.27      
 847 mike          1.111.6.1 	return String((Char16*)_rep->data + index, n);
 848 mike          1.27          }
 849 david.dillard 1.105     
 850                             return String();
 851 mike          1.27      }
 852                         
 853                         Uint32 String::find(Char16 c) const
 854                         {
 855 mike          1.111.6.1     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 856 mike          1.27      
 857 mike          1.111.6.1     if (p)
 858                         	return p - _rep->data;
 859 mike          1.27      
 860                             return PEG_NOT_FOUND;
 861                         }
 862                         
 863 kumpf         1.53      Uint32 String::find(Uint32 index, Char16 c) const
 864 mike          1.30      {
 865 mike          1.111.6.6     _checkBounds(index, _rep->size);
 866 mike          1.30      
 867 mike          1.111.6.1     if (index >= _rep->size)
 868                         	return PEG_NOT_FOUND;
 869                         
 870                             Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 871                         
 872                             if (p)
 873                         	return p - _rep->data;
 874 mike          1.30      
 875                             return PEG_NOT_FOUND;
 876                         }
 877                         
 878 mike          1.111.6.6 Uint32 StringFindAux(
 879 mike          1.111.6.5     const StringRep* _rep, const Char16* s, Uint32 n)
 880 mike          1.27      {
 881 mike          1.111.6.6     _checkNullPointer(s);
 882 mike          1.27      
 883 mike          1.111.6.1     const Uint16* data = _rep->data;
 884                             size_t rem = _rep->size;
 885 mike          1.30      
 886 mike          1.111.6.1     while (n <= rem)
 887 mike          1.27          {
 888 mike          1.111.6.1 	Uint16* p = (Uint16*)_find(data, rem, s[0]);
 889 david.dillard 1.105     
 890 mike          1.111.6.1 	if (!p)
 891                         	    break;
 892                         
 893                         	if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 894                         	    return p - _rep->data;
 895                         
 896                         	p++;
 897                         	rem -= p - data;
 898                         	data = p;
 899 mike          1.27          }
 900 mike          1.111.6.1 
 901 mike          1.27          return PEG_NOT_FOUND;
 902                         }
 903                         
 904 mike          1.111.6.1 Uint32 String::find(const char* s) const
 905                         {
 906 mike          1.111.6.6     _checkNullPointer(s);
 907 mike          1.111.6.1 
 908                             // Note: could optimize away creation of temporary, but this is rarely
 909                             // called.
 910                             return find(String(s));
 911                         }
 912                         
 913 mike          1.27      Uint32 String::reverseFind(Char16 c) const
 914                         {
 915 mike          1.111.6.1     Uint16 x = c;
 916                             Uint16* p = _rep->data;
 917                             Uint16* q = _rep->data + _rep->size;
 918 mike          1.27      
 919 mike          1.111.6.1     while (q != p)
 920 mike          1.27          {
 921 mike          1.111.6.1 	if (*--q == x)
 922                         	    return q - p;
 923 mike          1.27          }
 924                         
 925                             return PEG_NOT_FOUND;
 926                         }
 927                         
 928                         void String::toLower()
 929                         {
 930 david         1.69      #ifdef PEGASUS_HAS_ICU
 931 mike          1.111.6.1 
 932 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 933 david         1.90          {
 934 mike          1.111.6.1         if (Atomic_get(&_rep->refs) != 1)
 935 mike          1.111.6.6 	    _rep = StringRep::copyOnWrite(_rep);
 936 mike          1.111.6.1 
 937 yi.zhou       1.108             // This will do a locale-insensitive, but context-sensitive convert.
 938 mike          1.111.6.1         // Since context-sensitive casing looks at adjacent chars, this 
 939                                 // prevents optimizations where the us-ascii is converted before 
 940                                 // calling ICU.
 941 yi.zhou       1.108             // The string may shrink or expand after the convert.
 942                         
 943 mike          1.111.6.1 	//// First calculate size of resulting string. u_strToLower() returns
 944                         	//// only the size when zero is passed as the destination size argument.
 945                         
 946 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
 947                         
 948 mike          1.111.6.6         int32_t newSize = u_strToLower(
 949 mike          1.111.6.1 	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 950                                 
 951                                 err = U_ZERO_ERROR;
 952 chuck         1.99      
 953 mike          1.111.6.1 	//// Reserve enough space for the result.
 954                         
 955 mike          1.111.6.6 	if ((Uint32)newSize > _rep->cap)
 956                         	    _reserve(_rep, newSize);
 957 mike          1.111.6.1 
 958                         	//// Perform the conversion (overlapping buffers are allowed).
 959 yi.zhou       1.108     
 960 mike          1.111.6.6         u_strToLower((UChar*)_rep->data, newSize,
 961 mike          1.111.6.1 	    (UChar*)_rep->data, _rep->size, NULL, &err);
 962                         
 963 mike          1.111.6.6 	_rep->size = newSize;
 964 mike          1.111.6.1 	return;
 965 david         1.90          }
 966 mike          1.111.6.1 
 967                         #endif /* PEGASUS_HAS_ICU */
 968                         
 969                             if (Atomic_get(&_rep->refs) != 1)
 970 mike          1.111.6.6 	_rep = StringRep::copyOnWrite(_rep);
 971 mike          1.111.6.1 
 972                             Uint16* p = _rep->data;
 973                             size_t n = _rep->size;
 974                         
 975                             for (; n--; p++)
 976 david         1.90          {
 977 mike          1.111.6.1 	if (!(*p & 0xFF00))
 978 mike          1.111.6.6 	    *p = _toLower(*p);
 979 mike          1.27          }
 980 kumpf         1.39      }
 981                         
 982 chuck         1.99      void String::toUpper()
 983 david         1.90      {
 984                         #ifdef PEGASUS_HAS_ICU
 985 mike          1.111.6.1 
 986 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 987 chuck         1.99          {
 988 mike          1.111.6.1         if (Atomic_get(&_rep->refs) != 1)
 989 mike          1.111.6.6 	    _rep = StringRep::copyOnWrite(_rep);
 990 mike          1.111.6.1 
 991 yi.zhou       1.108             // This will do a locale-insensitive, but context-sensitive convert.
 992 mike          1.111.6.1         // Since context-sensitive casing looks at adjacent chars, this 
 993                                 // prevents optimizations where the us-ascii is converted before 
 994                                 // calling ICU.
 995 yi.zhou       1.108             // The string may shrink or expand after the convert.
 996                         
 997 mike          1.111.6.1 	//// First calculate size of resulting string. u_strToUpper() returns
 998                         	//// only the size when zero is passed as the destination size argument.
 999                         
1000 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
1001                         
1002 mike          1.111.6.6         int32_t newSize = u_strToUpper(
1003 mike          1.111.6.1 	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1004 chuck         1.99      
1005 mike          1.111.6.1         err = U_ZERO_ERROR;
1006 yi.zhou       1.108     
1007 mike          1.111.6.1 	//// Reserve enough space for the result.
1008 david         1.90      
1009 mike          1.111.6.6 	if ((Uint32)newSize > _rep->cap)
1010                         	    _reserve(_rep, newSize);
1011 kumpf         1.39      
1012 mike          1.111.6.1 	//// Perform the conversion (overlapping buffers are allowed).
1013 mike          1.27      
1014 mike          1.111.6.6         u_strToUpper((UChar*)_rep->data, newSize,
1015 mike          1.111.6.1 	    (UChar*)_rep->data, _rep->size, NULL, &err);
1016                         
1017 mike          1.111.6.6 	_rep->size = newSize;
1018 mike          1.111.6.1 
1019                         	return;
1020 mike          1.27          }
1021                         
1022 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1023                         
1024                             if (Atomic_get(&_rep->refs) != 1)
1025 mike          1.111.6.6 	_rep = StringRep::copyOnWrite(_rep);
1026 mike          1.111.6.1 
1027                             Uint16* p = _rep->data;
1028                             size_t n = _rep->size;
1029                         
1030                             for (; n--; p++)
1031 mike          1.111.6.6 	*p = _toUpper(*p);
1032 mike          1.27      }
1033                         
1034 mike          1.111.6.1 int String::compare(const String& s1, const String& s2, Uint32 n)
1035 mike          1.30      {
1036 mike          1.111.6.1     assert(n <= s1._rep->size);
1037                             assert(n <= s2._rep->size);
1038 kumpf         1.43      
1039 mike          1.111.6.1     // Ignoring error in which n is greater than s1.size() or s2.size()
1040                             return _compare(s1._rep->data, s2._rep->data, n);
1041                         }
1042 mike          1.30      
1043 mike          1.111.6.1 int String::compare(const String& s1, const String& s2)
1044                         {
1045                             return _compare(s1._rep->data, s2._rep->data);
1046                         }
1047 mike          1.30      
1048 mike          1.111.6.1 int String::compare(const String& s1, const char* s2)
1049                         {
1050 mike          1.111.6.6     _checkNullPointer(s2);
1051 mike          1.30      
1052 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
1053 mike          1.111.6.6     return _compareNoUTF8(s1._rep->data, s2);
1054 mike          1.111.6.1 #else
1055                             // ATTN: optimize this!
1056                             return String::compare(s1, String(s2));
1057                         #endif
1058 mike          1.30      }
1059                         
1060 mike          1.111.6.1 int String::compareNoCase(const String& str1, const String& str2)
1061 kumpf         1.40      {
1062 david         1.69      #ifdef PEGASUS_HAS_ICU
1063 mike          1.111.6.1 
1064 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
1065                             {
1066 mike          1.111.6.1         return  u_strcasecmp(
1067                         	    str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1068 yi.zhou       1.108         }
1069 kumpf         1.40      
1070 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1071 kumpf         1.40      
1072 mike          1.111.6.1     const Uint16* s1 = str1._rep->data;
1073                             const Uint16* s2 = str2._rep->data;
1074                         
1075                             while (*s1 && *s2)
1076                             {
1077 mike          1.111.6.6         int r = _toLower(*s1++) - _toLower(*s2++);
1078 kumpf         1.40      
1079 david.dillard 1.105             if (r)
1080                                     return r;
1081 kumpf         1.40          }
1082                         
1083 mike          1.111.6.1     if (*s2)
1084 david.dillard 1.105             return -1;
1085 mike          1.111.6.1     else if (*s1)
1086 david.dillard 1.105             return 1;
1087 kumpf         1.40      
1088                             return 0;
1089                         }
1090                         
1091 mike          1.111.6.6 Boolean StringEqualNoCase(const String& s1, const String& s2)
1092 mike          1.27      {
1093 david         1.69      #ifdef PEGASUS_HAS_ICU
1094 kumpf         1.39      
1095 mike          1.111.6.1     return String::compareNoCase(s1, s2) == 0;
1096 kumpf         1.39      
1097 mike          1.111.6.1 #else /* PEGASUS_HAS_ICU */
1098                         
1099 mike          1.111.6.5     Uint16* p = (Uint16*)s1.getChar16Data();
1100                             Uint16* q = (Uint16*)s2.getChar16Data();
1101                             Uint32 n = s2.size();
1102 mike          1.111.6.1 
1103                             while (n >= 8)
1104                             {
1105 mike          1.111.6.6 	if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1106                         	    ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1107                         	    ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1108                         	    ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1109                         	    ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1110                         	    ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1111                         	    ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1112                         	    ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1113 mike          1.111.6.1 	{
1114                         	    return false;
1115                         	}
1116                         
1117                         	n -= 8;
1118                         	p += 8;
1119                         	q += 8;
1120                             }
1121                         
1122                             while (n >= 4)
1123                             {
1124 mike          1.111.6.6 	if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1125                         	    ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1126                         	    ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1127                         	    ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1128 mike          1.111.6.1 	{
1129                         	    return false;
1130                         	}
1131                         
1132                         	n -= 4;
1133                         	p += 4;
1134                         	q += 4;
1135                             }
1136 mike          1.27      
1137 kumpf         1.39          while (n--)
1138                             {
1139 mike          1.111.6.6 	if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1140 mike          1.111.6.1 	    return false;
1141                         
1142                         	p++;
1143                         	q++;
1144 kumpf         1.39          }
1145 mike          1.28      
1146 kumpf         1.39          return true;
1147 david         1.69      
1148 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1149                         }
1150 mike          1.27      
1151 mike          1.111.6.1 Boolean String::equalNoCase(const String& s1, const char* s2)
1152 david         1.69      {
1153 mike          1.111.6.6     _checkNullPointer(s2);
1154 david         1.69      
1155 mike          1.111.6.1 #if defined(PEGASUS_HAS_ICU)
1156 david         1.69      
1157 mike          1.111.6.1     return String::equalNoCase(s1, String(s2));
1158 david.dillard 1.105     
1159 mike          1.111.6.1 #elif defined(PEGASUS_STRING_NO_UTF8)
1160 david         1.71      
1161 mike          1.111.6.1     const Uint16* p1 = (Uint16*)s1._rep->data;
1162                             const char* p2 = s2;
1163                             size_t n = s1._rep->size;
1164 kumpf         1.42      
1165 mike          1.111.6.1     while (n--)
1166                             {
1167                         	if (!*p2)
1168                         	    return false;
1169 karl          1.36      
1170 mike          1.111.6.6 	if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1171 mike          1.111.6.1 	    return false;
1172                             }
1173 david.dillard 1.105     
1174 mike          1.111.6.1     if (*p2)
1175                         	return false;
1176                             
1177                             return true;
1178 david.dillard 1.105     
1179 mike          1.111.6.1 #else /* PEGASUS_HAS_ICU */
1180 chuck         1.78      
1181 mike          1.111.6.1     // ATTN: optimize this!
1182                             return String::equalNoCase(s1, String(s2));
1183 david.dillard 1.105     
1184 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1185                         }
1186 karl          1.36      
1187 mike          1.111.6.1 Boolean String::equal(const String& s1, const String& s2)
1188 karl          1.36      {
1189 mike          1.111.6.1     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 
1190                         	s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1191 karl          1.36      }
1192                         
1193 mike          1.111.6.1 Boolean String::equal(const String& s1, const char* s2)
1194 karl          1.36      {
1195 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
1196 kumpf         1.39      
1197 mike          1.111.6.6     _checkNullPointer(s2);
1198 kumpf         1.39      
1199 mike          1.111.6.1     const Uint16* p = (Uint16*)s1._rep->data;
1200                             const char* q = s2;
1201 kumpf         1.39      
1202 mike          1.111.6.1     while (*p && *q)
1203                             {
1204                         	if (*p++ != Uint16(*q++))
1205                         	    return false;
1206                             }
1207 kumpf         1.39      
1208 mike          1.111.6.1     return !(*p || *q);
1209 kumpf         1.39      
1210 mike          1.111.6.1 #else /* PEGASUS_STRING_NO_UTF8 */
1211 kumpf         1.39      
1212 mike          1.111.6.1     return String::equal(s1, String(s2));
1213                         
1214                         #endif /* PEGASUS_STRING_NO_UTF8 */
1215 kumpf         1.39      }
1216                         
1217 kumpf         1.47      PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1218 kumpf         1.39      {
1219 david         1.69      #if defined(PEGASUS_OS_OS400)
1220 mike          1.111.6.1 
1221 david         1.93          CString cstr = str.getCString();
1222 david         1.69          const char* utf8str = cstr;
1223                             os << utf8str;
1224 mike          1.111.6.1     return os;
1225                         #else    
1226                         
1227                         #if defined(PEGASUS_HAS_ICU)
1228 david         1.69      
1229 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
1230                             {
1231 david.dillard 1.105             char *buf = NULL;
1232                                 const int size = str.size() * 6;
1233 mike          1.111.6.1         UnicodeString UniStr(
1234                         	    (const UChar *)str.getChar16Data(), (int32_t)str.size());
1235 david.dillard 1.105             Uint32 bufsize = UniStr.extract(0,size,buf);
1236                                 buf = new char[bufsize+1];
1237                                 UniStr.extract(0,bufsize,buf);
1238                                 os << buf;
1239                                 os.flush();
1240                                 delete [] buf;
1241 mike          1.111.6.1         return os;       
1242 yi.zhou       1.108         }
1243 mike          1.111.6.1 
1244                         #endif  // PEGASUS_HAS_ICU 
1245                         
1246                             for (Uint32 i = 0, n = str.size(); i < n; i++)
1247 yi.zhou       1.108         {
1248 mike          1.111.6.1         Uint16 code = str[i];
1249 david.dillard 1.105     
1250 mike          1.111.6.1        	if (code > 0 && !(code & 0xFF00))
1251                            	        os << char(code);
1252                                 else
1253                            	    {
1254                                     // Print in hex format:
1255                                     char buffer[8];
1256                                     sprintf(buffer, "\\x%04X", code);
1257                                     os << buffer;
1258 david.dillard 1.105             }
1259 yi.zhou       1.108         }
1260 kumpf         1.39      
1261                             return os;
1262 mike          1.111.6.1 #endif // PEGASUS_OS_OS400
1263 kumpf         1.39      }
1264                         
1265 mike          1.111.6.6 void StringAppendCharAux(StringRep*& _rep)
1266 kumpf         1.39      {
1267 mike          1.111.6.1     StringRep* tmp;
1268 kumpf         1.39      
1269 mike          1.111.6.1     if (_rep->cap)
1270                             {
1271                         	tmp = StringRep::alloc(2 * _rep->cap);
1272                         	tmp->size = _rep->size;
1273                         	_copy(tmp->data, _rep->data, _rep->size);
1274                             }
1275                             else
1276                             {
1277                         	tmp = StringRep::alloc(8);
1278                         	tmp->size = 0;
1279                             }
1280 kumpf         1.39      
1281 mike          1.111.6.1     StringRep::unref(_rep);
1282                             _rep = tmp;
1283 kumpf         1.39      }
1284                         
1285 mike          1.111.6.1 PEGASUS_NAMESPACE_END
1286 kumpf         1.39      
1287 mike          1.111.6.1 /*
1288                         ================================================================================
1289 kumpf         1.39      
1290 mike          1.111.6.1 String optimizations:
1291                         
1292                             1.  Added mechanism allowing certain functions to be inlined only when
1293                         	used by internal Pegasus modules. External modules (i.e., providers)
1294                         	link to a non-inline version, which allows for binary compatibility.
1295                         
1296                             2.  Implemented copy-on-write with atomic increment/decrement. This
1297                         	yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1298                         	for the 'ni1000' benchmark.
1299                         
1300                             3.	Employed loop unrolling in several places. For example, see:
1301                         
1302                         	    static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1303                         
1304                             4.  Used the "empty-rep" optimization (described in whitepaper from the
1305                         	GCC Developers Summit). This reduced default construction to a simple
1306                         	pointer assignment.
1307                         
1308 mike          1.111.6.6 	    inline String::String() : _rep(&_emptyRep) { }
1309 mike          1.111.6.1 
1310                             5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1311                         	For example:
1312                         
1313                         	    static const char _upper[] =
1314                         	    {
1315                         		0,1,2,...255
1316                         	    };
1317                         
1318 mike          1.111.6.6 	    inline Uint16 _toUpper(Uint16 x)
1319 mike          1.111.6.1 	    {
1320                         		return (x & 0xFF00) ? x : _upper[x];
1321                         	    }
1322                         
1323                         	This outperforms the system implementation by avoiding an anding 
1324                         	operation.
1325                         
1326                             6.  Implemented char* version of the following member functions to 
1327                         	eliminate unecessary creation of anonymous string objects 
1328                         	(temporaries).
1329                         
1330                         	    String(const String& s1, const char* s2);
1331                         	    String(const char* s1, const String& s2);
1332                         	    String& String::operator=(const char* str);
1333                         	    Uint32 String::find(const char* s) const;
1334                         	    bool String::equal(const String& s1, const char* s2);
1335                         	    static int String::compare(const String& s1, const char* s2);
1336                         	    String& String::append(const char* str);
1337                         	    String& String::append(const char* str, Uint32 size);
1338                         	    static bool String::equalNoCase(const String& s1, const char* s2);
1339                         	    String& operator=(const char* str)
1340 mike          1.111.6.1 	    String& String::assign(const char* str)
1341                         	    String& String::append(const char* str)
1342                         	    Boolean operator==(const String& s1, const char* s2)
1343                         	    Boolean operator==(const char* s1, const String& s2)
1344                         	    Boolean operator!=(const String& s1, const char* s2)
1345                         	    Boolean operator!=(const char* s1, const String& s2)
1346                         	    Boolean operator<(const String& s1, const char* s2)
1347                         	    Boolean operator<(const char* s1, const String& s2)
1348                         	    Boolean operator>(const String& s1, const char* s2)
1349                         	    Boolean operator>(const char* s1, const String& s2)
1350                         	    Boolean operator<=(const String& s1, const char* s2)
1351                         	    Boolean operator<=(const char* s1, const String& s2)
1352                         	    Boolean operator>=(const String& s1, const char* s2)
1353                         	    Boolean operator>=(const char* s1, const String& s2)
1354                         	    String operator+(const String& s1, const char* s2)
1355                         	    String operator+(const char* s1, const String& s2)
1356                         
1357 mike          1.111.6.5     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next 
1358 mike          1.111.6.1         power of two (algorithm from the book "Hacker's Delight").
1359                         
1360 mike          1.111.6.5 	    static Uint32 _roundUpToPow2(Uint32 x)
1361 mike          1.111.6.1 	    {
1362                         		if (x < 8)
1363                         		    return 8;
1364                         
1365                         		x--;
1366                         		x |= (x >> 1);
1367                         		x |= (x >> 2);
1368                         		x |= (x >> 4);
1369                         		x |= (x >> 8);
1370                         		x |= (x >> 16);
1371                         		x++;
1372                         
1373                         		return x;
1374                         	    }
1375                         
1376                             8.  Implemented "concatenating constructors" to eliminate temporaries
1377                         	created by operator+(). This scheme employs the "return-value 
1378                         	optimization" described by Stan Lippman.
1379                         
1380                         	    inline String operator+(const String& s1, const String& s2)
1381                         	    {
1382 mike          1.111.6.1 		return String(s1, s2, 0);
1383                         	    }
1384                         
1385                             9.  Experimented to find the optimial initial size for a short string.
1386                         	Eight seems to offer the best tradeoff between space and time.
1387                         
1388                             10. Inlined all members of the Char16 class.
1389                         
1390                             11. Used Uint16 internally in the String class. This showed no improvememnt
1391                         	since Char16 was already fully inlined and was essentially reduced to
1392                         	Uint16 in any case.
1393                         
1394                             12. Implemented conditional logic (#if) allowing error checking logic to
1395                         	be excluded to better performance. Examples include bounds checking 
1396                         	and null-pointer checking.
1397                         
1398                             13. Used memcpy() and memcmp() where possible. These are implemented using
1399                         	the rep family of intructions under Intel and are much faster.
1400                         
1401                             14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 
1402                         	copy routine overhead.
1403 mike          1.111.6.1 
1404                             15. Added ASCII7 form of the constructor and assign().
1405                         
1406                         	    String s("hello world", String::ASCII7);
1407                         
1408                         	    s.assignASCII7("hello world");
1409                         
1410                         	This avoids slower UTF8 processing when not needed.
1411                         
1412 mike          1.111.6.5 ================================================================================
1413                         
1414                         TO-DO:
1415                         
1416                             (+)	[DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1417                         
1418                             (+)	[DONE] Submit BUG-2754 (Windows buffer limit).
1419                         
1420                             (+) [DONE] Eliminate char versions of find() and append().
1421                         
1422                             (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1423                         
1424                             (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1425                         
1426                             (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1427                         
1428                             (+) [DONE] Comment StringRep allocation layout.
1429                         
1430                             (+) [DONE] Conceal private inline functions.
1431 mike          1.111.6.1 
1432 mike          1.111.6.5     (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1433 mike          1.111.6.1 
1434 mike          1.111.6.5     (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1435                         	rid of altogether.
1436 mike          1.111.6.1 
1437 mike          1.111.6.6     (+) [DONE] useCamelNotationOnAllFunctionNames.
1438                         
1439 mike          1.111.6.5     -----------
1440 mike          1.111.6.1 
1441 mike          1.111.6.5     (+) Check for overlow condition in StringRep::alloc().
1442 mike          1.111.6.1 
1443 mike          1.111.6.5     (+) Fix throw-related memory leak.
1444 mike          1.111.6.1 
1445 mike          1.111.6.5     (+)	DOC++ String.h
1446 mike          1.111.6.1 	
1447 mike          1.111.6.5     (+) Look at PEP223 for coding security guidelines.
1448 mike          1.111.6.1 
1449 mike          1.111.6.6     (+) Replace AtomicInt with new Atomic implementation.
1450 mike          1.111.6.1 
1451 mike          1.111.6.5     (+) Implement Atomic operations for HP.
1452 mike          1.111.6.3 
1453 mike          1.111.6.5     (+) Remove tabs.
1454 mike          1.111.6.1 
1455                         ================================================================================
1456                         */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2