(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/String.cpp between version 1.111.2.6 and 1.111.6.15

version 1.111.2.6, 2005/09/29 15:10:55 version 1.111.6.15, 2005/10/14 14:09:29
Line 27 
Line 27 
 // //
 //============================================================================== //==============================================================================
 // //
 // Author: Mike Brasher (mbrasher@bmc.com)  // Author: Mike Brasher (mbrasher@austin.rr.com)
   //
   // Modified By:
   //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
   //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
   //     David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
   //     Mike Brasher (mike-brasher@austin.rr.com)
 // //
 //%///////////////////////////////////////////////////////////////////////////// //%/////////////////////////////////////////////////////////////////////////////
  
 #define PEGASUS_USE_INTERNAL_INLINES  
 #include "String.h"  
 #include <cassert> #include <cassert>
 #include "InternalException.h" #include "InternalException.h"
 #include "CommonUTF.h" #include "CommonUTF.h"
 #include "CharSet.h"  #include "MessageLoader.h"
   #include "StringRep.h"
  
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
 #include <unicode/ustring.h> #include <unicode/ustring.h>
 #include <unicode/uchar.h> #include <unicode/uchar.h>
 #endif #endif
Line 47 
Line 52 
  
 //============================================================================== //==============================================================================
 // //
 // Compile-time switches (defined macros).  // Compile-time macros (undefined by default).
 //  
 //     PEGASUS_STRING_ENABLE_ICU -- enables use of ICU package.  
 // //
 //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
 // //
 //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
 // //
 //     PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature.  
 //  
 //============================================================================== //==============================================================================
  
 //============================================================================== //==============================================================================
Line 65 
Line 66 
 // //
 //============================================================================== //==============================================================================
  
 // Converts 16-bit characters to upper case.  // Note: this table is much faster than the system toupper(). Please do not
 inline Uint16 _to_upper(Uint16 x)  // change.
   
   const Uint8 _toUpperTable[256] =
   {
       0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
       0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
       0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
       0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
       0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
       0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
       0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
       0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
       0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
       0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
       0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
       0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
       0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
       0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
       0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
       0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
       0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
       0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
       0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
       0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
       0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
       0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
       0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
   };
   
   // Note: this table is much faster than the system tulower(). Please do not
   // change.
   
   const Uint8 _toLowerTable[256] =
 { {
     return (x & 0xFF00) ? x : CharSet::to_upper(x);      0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
       0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
       0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
       0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
       0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
       0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
       0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
       0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
       0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
       0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
       0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
       0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
       0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
       0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
       0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
       0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
       0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
       0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
       0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
       0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
       0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
       0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
       0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
   };
   
   // Converts 16-bit characters to upper case. This routine is faster than the
   // system toupper(). Please do not change.
   inline Uint16 _toUpper(Uint16 x)
   {
       return (x & 0xFF00) ? x : _toUpperTable[x];
 } }
  
 // Converts 16-bit characters to lower case.  // Converts 16-bit characters to lower case. This routine is faster than the
 inline Uint16 _to_lower(Uint16 x)  // system toupper(). Please do not change.
   inline Uint16 _toLower(Uint16 x)
 { {
     return (x & 0xFF00) ? x : CharSet::to_lower(x);      return (x & 0xFF00) ? x : _toLowerTable[x];
 } }
  
 // Rounds x to the next power of two (or just returns 8 if x < 8).  // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 static Uint32 _next_pow_2(Uint32 x)  static Uint32 _roundUpToPow2(Uint32 x)
 { {
   #ifndef PEGASUS_STRING_NO_THROW
   
       if (x > 0x0FFFFFFF)
           throw PEGASUS_STD(bad_alloc)();
   
   #endif
   
     if (x < 8)     if (x < 8)
         return 8;         return 8;
  
Line 97 
Line 185 
 template<class P, class Q> template<class P, class Q>
 static void _copy(P* p, const Q* q, size_t n) static void _copy(P* p, const Q* q, size_t n)
 { {
     // Use loop unrolling.      // The following employs loop unrolling for efficiency. Please do not
       // eliminate.
  
     while (n >= 8)     while (n >= 8)
     {     {
Line 131 
Line 220 
  
 static Uint16* _find(const Uint16* s, size_t n, Uint16 c) static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 { {
       // The following employs loop unrolling for efficiency. Please do not
       // eliminate.
   
     while (n >= 4)     while (n >= 4)
     {     {
         if (s[0] == c)         if (s[0] == c)
Line 187 
Line 279 
     return 0;     return 0;
 } }
  
 static int _compare_no_utf8(const Uint16* s1, const char* s2)  static int _compareNoUTF8(const Uint16* s1, const char* s2)
 { {
     Uint16 c1;     Uint16 c1;
     Uint16 c2;     Uint16 c2;
Line 220 
Line 312 
     memcpy(s1, s2, n * sizeof(Uint16));     memcpy(s1, s2, n * sizeof(Uint16));
 } }
  
 void String_throw_out_of_bounds()  void StringThrowOutOfBounds()
 { {
     throw IndexOutOfBoundsException();     throw IndexOutOfBoundsException();
 } }
  
 #ifdef PEGASUS_STRING_NO_THROW  inline void _checkNullPointer(const void* ptr)
 # define _check_null_pointer(ARG) /* empty */  
 #else  
 template<class T>  
 inline void _check_null_pointer(const T* ptr)  
 { {
   #ifdef PEGASUS_STRING_NO_THROW
   
     if (!ptr)     if (!ptr)
         throw NullPointer();         throw NullPointer();
 }  
 #endif #endif
   }
   
   static void _StringThrowBadUTF8(Uint32 index)
   {
       MessageLoaderParms parms(
           "Common.String.BAD_UTF8",
           "The byte sequence starting at index $0 "
           "is not valid UTF-8 encoding.",
           index);
       throw Exception(parms);
   }
  
 static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)  static size_t _copyFromUTF8(
       Uint16* dest,
       const char* src,
       size_t n,
       size_t& utf8_error_index)
 { {
     Uint16* p = dest;     Uint16* p = dest;
     const Uint8* q = (const Uint8*)src;     const Uint8* q = (const Uint8*)src;
  
     // Process leading 7-bit ASCII characters (to avoid UTF8 overhead below      // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
     // this loop). Use factor-four loop-unrolling.      // Use loop-unrolling.
  
     while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)      while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
       {
           p[0] = q[0];
           p[1] = q[1];
           p[2] = q[2];
           p[3] = q[3];
           p[4] = q[4];
           p[5] = q[5];
           p[6] = q[6];
           p[7] = q[7];
           p += 8;
           q += 8;
           n -= 8;
       }
   
       while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
     {     {
         p[0] = q[0];         p[0] = q[0];
         p[1] = q[1];         p[1] = q[1];
Line 267 
Line 387 
             }             }
             break;             break;
         case 2:         case 2:
             if (q[0] < 128 && q[1] < 128)              if (((q[0]|q[1]) & 0x80) == 0)
             {             {
                 p[0] = q[0];                 p[0] = q[0];
                 p[1] = q[1];                 p[1] = q[1];
Line 275 
Line 395 
             }             }
             break;             break;
         case 3:         case 3:
             if (q[0] < 128 && q[1] < 128 && q[2] < 128)              if (((q[0]|q[1]|q[2]) & 0x80) == 0)
             {             {
                 p[0] = q[0];                 p[0] = q[0];
                 p[1] = q[1];                 p[1] = q[1];
Line 303 
Line 423 
             if (c > n || !isValid_U8(q, c) ||             if (c > n || !isValid_U8(q, c) ||
                 UTF8toUTF16(&q, q + c, &p, p + n) != 0)                 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
             {             {
                 throw Exception("Bad UTF8 encoding");                  utf8_error_index = q - (const Uint8*)src;
                   return size_t(-1);
             }             }
  
             n -= c;             n -= c;
Line 315 
Line 436 
  
 // Note: dest must be at least three times src (plus an extra byte for // Note: dest must be at least three times src (plus an extra byte for
 // terminator). // terminator).
 static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)  static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 { {
       // The following employs loop unrolling for efficiency. Please do not
       // eliminate.
   
     const Uint16* q = src;     const Uint16* q = src;
     Uint8* p = (Uint8*)dest;     Uint8* p = (Uint8*)dest;
  
Line 368 
Line 492 
     return p - (Uint8*)dest;     return p - (Uint8*)dest;
 } }
  
 static inline size_t _convert(Uint16* p, const char* q, size_t n)  static inline size_t _convert(
       Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
 { {
 #ifdef PEGASUS_STRING_NO_UTF8 #ifdef PEGASUS_STRING_NO_UTF8
     _copy(p, q, n);     _copy(p, q, n);
     return n;     return n;
 #else #else
     return _copy_from_utf8(p, q, n);      return _copyFromUTF8(p, q, n, utf8_error_index);
 #endif #endif
 } }
  
Line 421 
Line 546 
 // //
 //============================================================================== //==============================================================================
  
 StringRep StringRep::_empty_rep;  StringRep StringRep::_emptyRep;
  
 inline StringRep* StringRep::alloc(size_t cap) inline StringRep* StringRep::alloc(size_t cap)
 { {
   #ifndef PEGASUS_STRING_NO_THROW
   
       // Any string bigger than this is seriously suspect.
       if (cap > 0x0FFFFFFF)
           throw PEGASUS_STD(bad_alloc)();
   
   #endif
   
     StringRep* rep = (StringRep*)::operator new(     StringRep* rep = (StringRep*)::operator new(
         sizeof(StringRep) + cap * sizeof(Uint16));         sizeof(StringRep) + cap * sizeof(Uint16));
     rep->cap = cap;     rep->cap = cap;
     Atomic_create(&rep->refs, 1);      new(&rep->refs) AtomicInt(1);
  
     return rep;     return rep;
 } }
  
 static inline void _reserve(StringRep*& rep, Uint32 cap) static inline void _reserve(StringRep*& rep, Uint32 cap)
 { {
     if (cap > rep->cap || Atomic_get(&rep->refs) != 1)      if (cap > rep->cap || rep->refs.value() != 1)
     {     {
         size_t n = _next_pow_2(cap);          size_t n = _roundUpToPow2(cap);
         StringRep* new_rep = StringRep::alloc(n);          StringRep* newRep = StringRep::alloc(n);
         new_rep->size = rep->size;          newRep->size = rep->size;
         _copy(new_rep->data, rep->data, rep->size + 1);          _copy(newRep->data, rep->data, rep->size + 1);
         StringRep::unref(rep);         StringRep::unref(rep);
         rep = new_rep;          rep = newRep;
     }     }
 } }
  
Line 455 
Line 588 
     return rep;     return rep;
 } }
  
 StringRep* StringRep::copy_on_write(StringRep* rep)  StringRep* StringRep::copyOnWrite(StringRep* rep)
 { {
     // Return a new copy of rep. Release rep.     // Return a new copy of rep. Release rep.
  
     StringRep* new_rep = StringRep::alloc(rep->size);      StringRep* newRep = StringRep::alloc(rep->size);
     new_rep->size = rep->size;      newRep->size = rep->size;
     _copy(new_rep->data, rep->data, rep->size);      _copy(newRep->data, rep->data, rep->size);
     new_rep->data[new_rep->size] = '\0';      newRep->data[newRep->size] = '\0';
     StringRep::unref(rep);     StringRep::unref(rep);
     return new_rep;      return newRep;
 } }
  
 StringRep* StringRep::create(const char* data, size_t size) StringRep* StringRep::create(const char* data, size_t size)
 { {
     StringRep* rep = StringRep::alloc(size);     StringRep* rep = StringRep::alloc(size);
     rep->size = _convert((Uint16*)rep->data, data, size);      size_t utf8_error_index;
     rep->data[rep->size] = '\0';      rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
  
     return rep;  #ifndef PEGASUS_STRING_NO_THROW
       if (rep->size == size_t(-1))
       {
           StringRep::free(rep);
           _StringThrowBadUTF8(utf8_error_index);
 } }
   #endif
   
       rep->data[rep->size] = '\0';
  
 StringRep* StringRep::createASCII7(const char* data, size_t size)  
 {  
     StringRep* rep = StringRep::alloc(size);  
     _copy((Uint16*)rep->data, data, size);  
     rep->data[rep->size = size] = '\0';  
     return rep;     return rep;
 } }
  
Line 506 
Line 641 
  
 String::String(const String& str, Uint32 n) String::String(const String& str, Uint32 n)
 { {
     _check_bounds(n, str._rep->size);      _checkBounds(n, str._rep->size);
     _rep = StringRep::create(str._rep->data, n);     _rep = StringRep::create(str._rep->data, n);
 } }
  
 String::String(const Char16* str) String::String(const Char16* str)
 { {
     _check_null_pointer(str);      _checkNullPointer(str);
     _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));     _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 } }
  
 String::String(const Char16* str, Uint32 n) String::String(const Char16* str, Uint32 n)
 { {
     _check_null_pointer(str);      _checkNullPointer(str);
     _rep = StringRep::create((Uint16*)str, n);     _rep = StringRep::create((Uint16*)str, n);
 } }
  
 String::String(const char* str) String::String(const char* str)
 { {
     _check_null_pointer(str);      _checkNullPointer(str);
     _rep = StringRep::create(str, strlen(str));  
 }  
  
 String::String(const char* str, String::ASCII7Tag tag)      // Set this just in case create() throws an exception.
 {      _rep = &StringRep::_emptyRep;
     _check_null_pointer(str);      _rep = StringRep::create(str, strlen(str));
     _rep = StringRep::createASCII7(str, strlen(str));  
 } }
  
 String::String(const char* str, Uint32 n) String::String(const char* str, Uint32 n)
 { {
     _check_null_pointer(str);      _checkNullPointer(str);
     _rep = StringRep::create(str, n);  
 }  
  
 String::String(const char* str, size_t n, String::ASCII7Tag tag)      // Set this just in case create() throws an exception.
 {      _rep = &StringRep::_emptyRep;
     _check_null_pointer(str);      _rep = StringRep::create(str, n);
     _rep = StringRep::createASCII7(str, n);  
 } }
  
 String::String(const String& s1, const String& s2) String::String(const String& s1, const String& s2)
Line 560 
Line 689 
  
 String::String(const String& s1, const char* s2) String::String(const String& s1, const char* s2)
 { {
     _check_null_pointer(s2);      _checkNullPointer(s2);
     size_t n1 = s1._rep->size;     size_t n1 = s1._rep->size;
     size_t n2 = strlen(s2);     size_t n2 = strlen(s2);
     _rep = StringRep::alloc(n1 + n2);     _rep = StringRep::alloc(n1 + n2);
     _copy(_rep->data, s1._rep->data, n1);     _copy(_rep->data, s1._rep->data, n1);
     _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);      size_t utf8_error_index;
       size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
   
   #ifndef PEGASUS_STRING_NO_THROW
       if (tmp == size_t(-1))
       {
           StringRep::free(_rep);
           _rep = &StringRep::_emptyRep;
           _StringThrowBadUTF8(utf8_error_index);
       }
   #endif
   
       _rep->size = n1 + tmp;
     _rep->data[_rep->size] = '\0';     _rep->data[_rep->size] = '\0';
 } }
  
 String::String(const char* s1, const String& s2) String::String(const char* s1, const String& s2)
 { {
     _check_null_pointer(s1);      _checkNullPointer(s1);
     size_t n1 = strlen(s1);     size_t n1 = strlen(s1);
     size_t n2 = s2._rep->size;     size_t n2 = s2._rep->size;
     _rep = StringRep::alloc(n1 + n2);     _rep = StringRep::alloc(n1 + n2);
     _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);      size_t utf8_error_index;
       size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
   
   #ifndef PEGASUS_STRING_NO_THROW
       if (tmp ==  size_t(-1))
       {
           StringRep::free(_rep);
           _rep = &StringRep::_emptyRep;
           _StringThrowBadUTF8(utf8_error_index);
       }
   #endif
   
       _rep->size = n2 + tmp;
     _copy(_rep->data + n1, s2._rep->data, n2);     _copy(_rep->data + n1, s2._rep->data, n2);
     _rep->data[_rep->size] = '\0';     _rep->data[_rep->size] = '\0';
 } }
Line 593 
Line 746 
  
 String& String::assign(const Char16* str, Uint32 n) String& String::assign(const Char16* str, Uint32 n)
 { {
     _check_null_pointer(str);      _checkNullPointer(str);
  
     if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)      if (n > _rep->cap || _rep->refs.value() != 1)
     {     {
         StringRep::unref(_rep);         StringRep::unref(_rep);
         _rep = StringRep::alloc(n);         _rep = StringRep::alloc(n);
Line 610 
Line 763 
  
 String& String::assign(const char* str, Uint32 n) String& String::assign(const char* str, Uint32 n)
 { {
     _check_null_pointer(str);      _checkNullPointer(str);
  
     if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)      if (n > _rep->cap || _rep->refs.value() != 1)
     {     {
         StringRep::unref(_rep);         StringRep::unref(_rep);
         _rep = StringRep::alloc(n);         _rep = StringRep::alloc(n);
     }     }
  
     _rep->size = _convert(_rep->data, str, n);      size_t utf8_error_index;
     _rep->data[_rep->size] = 0;      _rep->size = _convert(_rep->data, str, n, utf8_error_index);
   
     return *this;  
 }  
  
 String& String::assignASCII7(const char* str, Uint32 n)  #ifndef PEGASUS_STRING_NO_THROW
       if (_rep->size ==  size_t(-1))
 { {
     _check_null_pointer(str);          StringRep::free(_rep);
           _rep = &StringRep::_emptyRep;
     if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)          _StringThrowBadUTF8(utf8_error_index);
     {  
         StringRep::unref(_rep);  
         _rep = StringRep::alloc(n);  
     }     }
   #endif
  
     _copy(_rep->data, str, n);      _rep->data[_rep->size] = 0;
     _rep->data[_rep->size = n] = 0;  
  
     return *this;     return *this;
 } }
Line 644 
Line 792 
 { {
     if (_rep->size)     if (_rep->size)
     {     {
         if (Atomic_get(&_rep->refs) == 1)          if (_rep->refs.value() == 1)
           {
             _rep->size = 0;             _rep->size = 0;
               _rep->data[0] = '\0';
           }
         else         else
         {         {
             StringRep::unref(_rep);             StringRep::unref(_rep);
             _rep = &StringRep::_empty_rep;              _rep = &StringRep::_emptyRep;
         }         }
     }     }
 } }
Line 661 
Line 812 
  
 CString String::getCString() const CString String::getCString() const
 { {
       // A UTF8 string can have three times as many characters as its UTF16
       // counterpart, so we allocate extra memory for the worst case. In the
       // best case, we may need only one third of the memory allocated. But
       // downsizing the string afterwards is expensive and unecessary since
       // CString objects are usually short-lived (disappearing after only a few
       // instructions). CString objects are typically created on the stack as
       // means to obtain a char* pointer.
   
 #ifdef PEGASUS_STRING_NO_UTF8 #ifdef PEGASUS_STRING_NO_UTF8
     char* str = (char*)operator new(_rep->size + 1);     char* str = (char*)operator new(_rep->size + 1);
     _copy(str, _rep->data, _rep->size);     _copy(str, _rep->data, _rep->size);
Line 669 
Line 828 
 #else #else
     Uint32 n = 3 * _rep->size;     Uint32 n = 3 * _rep->size;
     char* str = (char*)operator new(n + 1);     char* str = (char*)operator new(n + 1);
     size_t size = _copy_to_utf8(str, _rep->data, _rep->size);      size_t size = _copyToUTF8(str, _rep->data, _rep->size);
     str[size] = '\0';     str[size] = '\0';
     return CString(str);     return CString(str);
 #endif #endif
Line 677 
Line 836 
  
 String& String::append(const Char16* str, Uint32 n) String& String::append(const Char16* str, Uint32 n)
 { {
     _check_null_pointer(str);      _checkNullPointer(str);
  
     size_t old_size = _rep->size;      size_t oldSize = _rep->size;
     size_t new_size = old_size + n;      size_t newSize = oldSize + n;
     _reserve(_rep, new_size);      _reserve(_rep, newSize);
     _copy(_rep->data + old_size, (Uint16*)str, n);      _copy(_rep->data + oldSize, (Uint16*)str, n);
     _rep->size = new_size;      _rep->size = newSize;
     _rep->data[new_size] = '\0';      _rep->data[newSize] = '\0';
  
     return *this;     return *this;
 } }
Line 696 
Line 855 
  
 String& String::append(const char* str, Uint32 size) String& String::append(const char* str, Uint32 size)
 { {
     _check_null_pointer(str);      _checkNullPointer(str);
  
     size_t old_size = _rep->size;      size_t oldSize = _rep->size;
     size_t cap = old_size + size;      size_t cap = oldSize + size;
  
     _reserve(_rep, cap);     _reserve(_rep, cap);
     _rep->size += _convert((Uint16*)_rep->data + old_size, str, size);      size_t utf8_error_index;
       size_t tmp = _convert(
           (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
   
   #ifndef PEGASUS_STRING_NO_THROW
       if (tmp ==  size_t(-1))
       {
           StringRep::free(_rep);
           _rep = &StringRep::_emptyRep;
           _StringThrowBadUTF8(utf8_error_index);
       }
   #endif
   
       _rep->size += tmp;
     _rep->data[_rep->size] = '\0';     _rep->data[_rep->size] = '\0';
  
     return *this;     return *this;
Line 713 
Line 885 
     if (n == PEG_NOT_FOUND)     if (n == PEG_NOT_FOUND)
         n = _rep->size - index;         n = _rep->size - index;
  
     _check_bounds(index + n, _rep->size);      _checkBounds(index + n, _rep->size);
  
     if (Atomic_get(&_rep->refs) != 1)      if (_rep->refs.value() != 1)
         _rep = StringRep::copy_on_write(_rep);          _rep = StringRep::copyOnWrite(_rep);
  
     assert(index + n <= _rep->size);     assert(index + n <= _rep->size);
  
Line 758 
Line 930 
  
 Uint32 String::find(Uint32 index, Char16 c) const Uint32 String::find(Uint32 index, Char16 c) const
 { {
     _check_bounds(index, _rep->size);      _checkBounds(index, _rep->size);
  
     if (index >= _rep->size)     if (index >= _rep->size)
         return PEG_NOT_FOUND;         return PEG_NOT_FOUND;
Line 771 
Line 943 
     return PEG_NOT_FOUND;     return PEG_NOT_FOUND;
 } }
  
 Uint32 String::_find_aux(const Char16* s, Uint32 n) const  Uint32 StringFindAux(
       const StringRep* _rep, const Char16* s, Uint32 n)
 { {
     _check_null_pointer(s);      _checkNullPointer(s);
  
     const Uint16* data = _rep->data;     const Uint16* data = _rep->data;
     size_t rem = _rep->size;     size_t rem = _rep->size;
Line 798 
Line 971 
  
 Uint32 String::find(const char* s) const Uint32 String::find(const char* s) const
 { {
     _check_null_pointer(s);      _checkNullPointer(s);
  
     // Note: could optimize away creation of temporary, but this is rarely     // Note: could optimize away creation of temporary, but this is rarely
     // called.     // called.
Line 822 
Line 995 
  
 void String::toLower() void String::toLower()
 { {
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
  
     if (InitializeICU::initICUSuccessful())     if (InitializeICU::initICUSuccessful())
     {     {
           if (_rep->refs.value() != 1)
               _rep = StringRep::copyOnWrite(_rep);
   
           // This will do a locale-insensitive, but context-sensitive convert.
           // Since context-sensitive casing looks at adjacent chars, this
           // prevents optimizations where the us-ascii is converted before
           // calling ICU.
           // The string may shrink or expand after the convert.
   
         //// First calculate size of resulting string. u_strToLower() returns         //// First calculate size of resulting string. u_strToLower() returns
         //// only the size when zero is passed as the destination size argument.         //// only the size when zero is passed as the destination size argument.
  
         UErrorCode err = U_ZERO_ERROR;         UErrorCode err = U_ZERO_ERROR;
  
         int32_t new_size = u_strToLower(          int32_t newSize = u_strToLower(
             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
  
           err = U_ZERO_ERROR;
   
         //// Reserve enough space for the result.         //// Reserve enough space for the result.
  
         if ((Uint32)new_size > _rep->cap)          if ((Uint32)newSize > _rep->cap)
             _reserve(_rep, new_size);              _reserve(_rep, newSize);
  
         //// Perform the conversion (overlapping buffers are allowed).         //// Perform the conversion (overlapping buffers are allowed).
  
         u_strToLower((UChar*)_rep->data, new_size,          u_strToLower((UChar*)_rep->data, newSize,
             (UChar*)_rep->data, _rep->size, NULL, &err);             (UChar*)_rep->data, _rep->size, NULL, &err);
  
         _rep->size = new_size;          _rep->size = newSize;
           return;
     }     }
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
  
     if (Atomic_get(&_rep->refs) != 1)      if (_rep->refs.value() != 1)
         _rep = StringRep::copy_on_write(_rep);          _rep = StringRep::copyOnWrite(_rep);
  
     Uint16* p = _rep->data;     Uint16* p = _rep->data;
     size_t n = _rep->size;     size_t n = _rep->size;
Line 858 
Line 1043 
     for (; n--; p++)     for (; n--; p++)
     {     {
         if (!(*p & 0xFF00))         if (!(*p & 0xFF00))
             *p = _to_lower(*p);              *p = _toLower(*p);
     }     }
 } }
  
 void String::toUpper() void String::toUpper()
 { {
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
  
     if (InitializeICU::initICUSuccessful())     if (InitializeICU::initICUSuccessful())
     {     {
           if (_rep->refs.value() != 1)
               _rep = StringRep::copyOnWrite(_rep);
   
           // This will do a locale-insensitive, but context-sensitive convert.
           // Since context-sensitive casing looks at adjacent chars, this
           // prevents optimizations where the us-ascii is converted before
           // calling ICU.
           // The string may shrink or expand after the convert.
   
         //// First calculate size of resulting string. u_strToUpper() returns         //// First calculate size of resulting string. u_strToUpper() returns
         //// only the size when zero is passed as the destination size argument.         //// only the size when zero is passed as the destination size argument.
  
         UErrorCode err = U_ZERO_ERROR;         UErrorCode err = U_ZERO_ERROR;
  
         int32_t new_size = u_strToUpper(          int32_t newSize = u_strToUpper(
             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
  
           err = U_ZERO_ERROR;
   
         //// Reserve enough space for the result.         //// Reserve enough space for the result.
  
         if ((Uint32)new_size > _rep->cap)          if ((Uint32)newSize > _rep->cap)
             _reserve(_rep, new_size);              _reserve(_rep, newSize);
  
         //// Perform the conversion (overlapping buffers are allowed).         //// Perform the conversion (overlapping buffers are allowed).
  
         u_strToUpper((UChar*)_rep->data, new_size,          u_strToUpper((UChar*)_rep->data, newSize,
             (UChar*)_rep->data, _rep->size, NULL, &err);             (UChar*)_rep->data, _rep->size, NULL, &err);
  
         _rep->size = new_size;          _rep->size = newSize;
   
           return;
     }     }
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
  
     if (Atomic_get(&_rep->refs) != 1)      if (_rep->refs.value() != 1)
         _rep = StringRep::copy_on_write(_rep);          _rep = StringRep::copyOnWrite(_rep);
  
     Uint16* p = _rep->data;     Uint16* p = _rep->data;
     size_t n = _rep->size;     size_t n = _rep->size;
  
     for (; n--; p++)     for (; n--; p++)
         *p = _to_upper(*p);          *p = _toUpper(*p);
 } }
  
 int String::compare(const String& s1, const String& s2, Uint32 n) int String::compare(const String& s1, const String& s2, Uint32 n)
Line 917 
Line 1115 
  
 int String::compare(const String& s1, const char* s2) int String::compare(const String& s1, const char* s2)
 { {
     _check_null_pointer(s2);      _checkNullPointer(s2);
  
 #ifdef PEGASUS_STRING_NO_UTF8 #ifdef PEGASUS_STRING_NO_UTF8
     return _compare_no_utf8(s1._rep->data, s2);      return _compareNoUTF8(s1._rep->data, s2);
 #else #else
     // ATTN: optimize this!     // ATTN: optimize this!
     return String::compare(s1, String(s2));     return String::compare(s1, String(s2));
Line 929 
Line 1127 
  
 int String::compareNoCase(const String& str1, const String& str2) int String::compareNoCase(const String& str1, const String& str2)
 { {
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
  
     if (InitializeICU::initICUSuccessful())     if (InitializeICU::initICUSuccessful())
     {     {
Line 937 
Line 1135 
             str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);             str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
     }     }
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
  
     const Uint16* s1 = str1._rep->data;     const Uint16* s1 = str1._rep->data;
     const Uint16* s2 = str2._rep->data;     const Uint16* s2 = str2._rep->data;
  
     while (*s1 && *s2)     while (*s1 && *s2)
     {     {
         int r = _to_lower(*s1++) - _to_lower(*s2++);          int r = _toLower(*s1++) - _toLower(*s2++);
  
         if (r)         if (r)
             return r;             return r;
Line 958 
Line 1156 
     return 0;     return 0;
 } }
  
 Boolean String::equalNoCase_aux(const String& s1, const String& s2)  Boolean StringEqualNoCase(const String& s1, const String& s2)
 { {
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
  
     return String::compareNoCase(s1, s2) == 0;     return String::compareNoCase(s1, s2) == 0;
  
 #else /* PEGASUS_STRING_ENABLE_ICU */  #else /* PEGASUS_HAS_ICU */
   
       // The following employs loop unrolling for efficiency. Please do not
       // eliminate.
  
     Uint16* p = (Uint16*)s1._rep->data;      Uint16* p = (Uint16*)s1.getChar16Data();
     Uint16* q = (Uint16*)s2._rep->data;      Uint16* q = (Uint16*)s2.getChar16Data();
     Uint32 n = s2._rep->size;      Uint32 n = s2.size();
  
     while (n >= 8)     while (n >= 8)
     {     {
         if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||          if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
             ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||              ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
             ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||              ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
             ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) ||              ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
             ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) ||              ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
             ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) ||              ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
             ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) ||              ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
             ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7]))))              ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
         {         {
             return false;             return false;
         }         }
Line 991 
Line 1192 
  
     while (n >= 4)     while (n >= 4)
     {     {
         if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||          if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
             ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||              ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
             ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||              ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
             ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))))              ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
         {         {
             return false;             return false;
         }         }
Line 1006 
Line 1207 
  
     while (n--)     while (n--)
     {     {
         if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))))          if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
             return false;             return false;
  
         p++;         p++;
Line 1015 
Line 1216 
  
     return true;     return true;
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
 } }
  
 Boolean String::equalNoCase(const String& s1, const char* s2) Boolean String::equalNoCase(const String& s1, const char* s2)
 { {
     _check_null_pointer(s2);      _checkNullPointer(s2);
  
 #if defined(PEGASUS_STRING_ENABLE_ICU)  #if defined(PEGASUS_HAS_ICU)
  
     return String::equalNoCase(s1, String(s2));     return String::equalNoCase(s1, String(s2));
  
Line 1037 
Line 1238 
         if (!*p2)         if (!*p2)
             return false;             return false;
  
         if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++)))          if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
             return false;             return false;
     }     }
  
       if (*p2)
           return false;
   
     return true;     return true;
  
 #else /* PEGASUS_STRING_ENABLE_ICU */  #else /* PEGASUS_HAS_ICU */
  
     // ATTN: optimize this!     // ATTN: optimize this!
     return String::equalNoCase(s1, String(s2));     return String::equalNoCase(s1, String(s2));
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
 } }
  
 Boolean String::equal(const String& s1, const String& s2) Boolean String::equal(const String& s1, const String& s2)
Line 1061 
Line 1265 
 { {
 #ifdef PEGASUS_STRING_NO_UTF8 #ifdef PEGASUS_STRING_NO_UTF8
  
     _check_null_pointer(s2);      _checkNullPointer(s2);
  
     const Uint16* p = (Uint16*)s1._rep->data;     const Uint16* p = (Uint16*)s1._rep->data;
     const char* q = s2;     const char* q = s2;
Line 1088 
Line 1292 
     CString cstr = str.getCString();     CString cstr = str.getCString();
     const char* utf8str = cstr;     const char* utf8str = cstr;
     os << utf8str;     os << utf8str;
       return os;
   #else
  
 #elif defined(PEGASUS_STRING_ENABLE_ICU)  #if defined(PEGASUS_HAS_ICU)
  
     if (InitializeICU::initICUSuccessful())     if (InitializeICU::initICUSuccessful())
     {     {
Line 1103 
Line 1309 
         os << buf;         os << buf;
         os.flush();         os.flush();
         delete [] buf;         delete [] buf;
           return os;
     }     }
  
 #endif /* PEGASUS_OS_OS400 */  #endif  // PEGASUS_HAS_ICU
  
     for (Uint32 i = 0, n = str.size(); i < n; i++)     for (Uint32 i = 0, n = str.size(); i < n; i++)
     {     {
Line 1123 
Line 1330 
     }     }
  
     return os;     return os;
   #endif // PEGASUS_OS_OS400
 } }
  
 void String::_append_char_aux()  void StringAppendCharAux(StringRep*& _rep)
 { {
     StringRep* tmp;     StringRep* tmp;
  
Line 1168 
Line 1376 
         GCC Developers Summit). This reduced default construction to a simple         GCC Developers Summit). This reduced default construction to a simple
         pointer assignment.         pointer assignment.
  
             inline String::String() : _rep(&_empty_rep) { }              inline String::String() : _rep(&_emptyRep) { }
  
     5.  Implemented Uint16 versions of toupper() and tolower() using tables.     5.  Implemented Uint16 versions of toupper() and tolower() using tables.
         For example:         For example:
Line 1178 
Line 1386 
                 0,1,2,...255                 0,1,2,...255
             };             };
  
             inline Uint16 _to_upper(Uint16 x)              inline Uint16 _toUpper(Uint16 x)
             {             {
                 return (x & 0xFF00) ? x : _upper[x];                 return (x & 0xFF00) ? x : _upper[x];
             }             }
Line 1217 
Line 1425 
             String operator+(const String& s1, const char* s2)             String operator+(const String& s1, const char* s2)
             String operator+(const char* s1, const String& s2)             String operator+(const char* s1, const String& s2)
  
     7.  Optimized _next_pow_2(), used in rounding the capacity to the next      7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
         power of two (algorithm from the book "Hacker's Delight").         power of two (algorithm from the book "Hacker's Delight").
  
             static Uint32 _next_pow_2(Uint32 x)              static Uint32 _roundUpToPow2(Uint32 x)
             {             {
                 if (x < 8)                 if (x < 8)
                     return 8;                     return 8;
Line 1246 
Line 1454 
             }             }
  
     9.  Experimented to find the optimial initial size for a short string.     9.  Experimented to find the optimial initial size for a short string.
         Eight seems to offer the best tradoff between space and time.          Eight seems to offer the best tradeoff between space and time.
  
     10. Inlined all members of the Char16 class.     10. Inlined all members of the Char16 class.
  
Line 1273 
Line 1481 
         This avoids slower UTF8 processing when not needed.         This avoids slower UTF8 processing when not needed.
  
 ================================================================================ ================================================================================
   
   TO-DO:
   
       (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
   
       (+) [DONE] Submit BUG-2754 (Windows buffer limit).
   
       (+) [DONE] Eliminate char versions of find() and append().
   
       (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
   
       (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
   
       (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
   
       (+) [DONE] Comment StringRep allocation layout.
   
       (+) [DONE] Conceal private inline functions.
   
       (+) [DONE] Shorten inclusion of StringInline.h in String.h.
   
       (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
           rid of altogether.
   
       (+) [DONE] useCamelNotationOnAllFunctionNames.
   
       (+) [DONE] Check for overlow condition in StringRep::alloc().
   
       (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
   
       (+) [DONE] Fix throw-related memory leak.
   
       (+) [DONE] Look at PEP223 for coding security guidelines.
   
       (+) [DONE] Use old AtomicInt for now (split new AtomicInt into another
           bug.
   
       (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
   
       -----------
   
       (+) DOC++ String.h
   
   ================================================================================
 */ */


Legend:
Removed from v.1.111.2.6  
changed lines
  Added in v.1.111.6.15

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2