(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

Diff for /pegasus/src/Pegasus/Common/String.cpp between version 1.111.2.7 and 1.111.6.3

version 1.111.2.7, 2005/09/30 12:57:15 version 1.111.6.3, 2005/10/08 02:16:01
Line 29 
Line 29 
 // //
 // Author: Mike Brasher (mbrasher@bmc.com) // Author: Mike Brasher (mbrasher@bmc.com)
 // //
   // Modified By:
   //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
   //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
   //     David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
   //     Mike Brasher (mike-brasher@austin.rr.com)
   //
 //%///////////////////////////////////////////////////////////////////////////// //%/////////////////////////////////////////////////////////////////////////////
  
 #define PEGASUS_USE_INTERNAL_INLINES  
 #include "String.h"  
 #include <cassert> #include <cassert>
 #include "InternalException.h" #include "InternalException.h"
 #include "CommonUTF.h" #include "CommonUTF.h"
 #include "CharSet.h"  #include "MessageLoader.h"
   #include "StringRep.h"
  
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
 #include <unicode/ustring.h> #include <unicode/ustring.h>
 #include <unicode/uchar.h> #include <unicode/uchar.h>
 #endif #endif
Line 49 
Line 54 
 // //
 // Compile-time macros (undefined by default). // Compile-time macros (undefined by default).
 // //
 //     PEGASUS_STRING_ENABLE_ICU -- enables use of ICU package  
 //  
 //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
 // //
 //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code. //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
Line 65 
Line 68 
 // //
 //============================================================================== //==============================================================================
  
   const Uint8 _to_upper_tbl[256] =
   {
       0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
       0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
       0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
       0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
       0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
       0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
       0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
       0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
       0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
       0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
       0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
       0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
       0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
       0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
       0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
       0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
       0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
       0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
       0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
       0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
       0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
       0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
       0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
   };
   
   const Uint8 _to_lower_tbl[256] =
   {
       0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
       0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
       0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
       0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
       0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
       0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
       0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
       0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
       0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
       0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
       0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
       0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
       0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
       0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
       0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
       0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
       0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
       0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
       0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
       0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
       0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
       0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
       0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
   };
   
 // Converts 16-bit characters to upper case. // Converts 16-bit characters to upper case.
 inline Uint16 _to_upper(Uint16 x) inline Uint16 _to_upper(Uint16 x)
 { {
     return (x & 0xFF00) ? x : CharSet::to_upper(x);      return (x & 0xFF00) ? x : _to_upper_tbl[x];
 } }
  
 // Converts 16-bit characters to lower case. // Converts 16-bit characters to lower case.
 inline Uint16 _to_lower(Uint16 x) inline Uint16 _to_lower(Uint16 x)
 { {
     return (x & 0xFF00) ? x : CharSet::to_lower(x);      return (x & 0xFF00) ? x : _to_lower_tbl[x];
 } }
  
 // Rounds x to the next power of two (or just returns 8 if x < 8). // Rounds x to the next power of two (or just returns 8 if x < 8).
Line 212 
Line 287 
     while (n-- && (*s1++ - *s2++) == 0)     while (n-- && (*s1++ - *s2++) == 0)
         ;         ;
  
       //
   
     return s1[-1] - s2[-1];     return s1[-1] - s2[-1];
 } }
  
Line 241 
Line 318 
     Uint16* p = dest;     Uint16* p = dest;
     const Uint8* q = (const Uint8*)src;     const Uint8* q = (const Uint8*)src;
  
     // Process leading 7-bit ASCII characters (to avoid UTF8 overhead below      // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
     // this loop). Use factor-four loop-unrolling.      // Use loop-unrolling.
  
     while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)      while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
       {
           p[0] = q[0];
           p[1] = q[1];
           p[2] = q[2];
           p[3] = q[3];
           p[4] = q[4];
           p[5] = q[5];
           p[6] = q[6];
           p[7] = q[7];
           p += 8;
           q += 8;
           n -= 8;
       }
   
       while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
     {     {
         p[0] = q[0];         p[0] = q[0];
         p[1] = q[1];         p[1] = q[1];
Line 267 
Line 359 
             }             }
             break;             break;
         case 2:         case 2:
             if (q[0] < 128 && q[1] < 128)              if (((q[0]|q[1]) & 0x80) == 0)
             {             {
                 p[0] = q[0];                 p[0] = q[0];
                 p[1] = q[1];                 p[1] = q[1];
Line 275 
Line 367 
             }             }
             break;             break;
         case 3:         case 3:
             if (q[0] < 128 && q[1] < 128 && q[2] < 128)              if (((q[0]|q[1]|q[2]) & 0x80) == 0)
             {             {
                 p[0] = q[0];                 p[0] = q[0];
                 p[1] = q[1];                 p[1] = q[1];
Line 303 
Line 395 
             if (c > n || !isValid_U8(q, c) ||             if (c > n || !isValid_U8(q, c) ||
                 UTF8toUTF16(&q, q + c, &p, p + n) != 0)                 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
             {             {
                 throw Exception("Bad UTF8 encoding");                  MessageLoaderParms parms("Common.String.BAD_UTF8",
                       "The byte sequence starting at index $0 "
                       "is not valid UTF-8 encoding.",
                        q - (const Uint8*)src);
                   throw Exception(parms);
             }             }
  
             n -= c;             n -= c;
Line 822 
Line 918 
  
 void String::toLower() void String::toLower()
 { {
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
  
     if (InitializeICU::initICUSuccessful())     if (InitializeICU::initICUSuccessful())
     {     {
           if (Atomic_get(&_rep->refs) != 1)
                   _rep = StringRep::copy_on_write(_rep);
   
           // This will do a locale-insensitive, but context-sensitive convert.
           // Since context-sensitive casing looks at adjacent chars, this
           // prevents optimizations where the us-ascii is converted before
           // calling ICU.
           // The string may shrink or expand after the convert.
   
         //// First calculate size of resulting string. u_strToLower() returns         //// First calculate size of resulting string. u_strToLower() returns
         //// only the size when zero is passed as the destination size argument.         //// only the size when zero is passed as the destination size argument.
  
Line 834 
Line 939 
         int32_t new_size = u_strToLower(         int32_t new_size = u_strToLower(
             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
  
           err = U_ZERO_ERROR;
   
         //// Reserve enough space for the result.         //// Reserve enough space for the result.
  
         if ((Uint32)new_size > _rep->cap)         if ((Uint32)new_size > _rep->cap)
Line 845 
Line 952 
             (UChar*)_rep->data, _rep->size, NULL, &err);             (UChar*)_rep->data, _rep->size, NULL, &err);
  
         _rep->size = new_size;         _rep->size = new_size;
           return;
     }     }
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
  
     if (Atomic_get(&_rep->refs) != 1)     if (Atomic_get(&_rep->refs) != 1)
         _rep = StringRep::copy_on_write(_rep);         _rep = StringRep::copy_on_write(_rep);
Line 864 
Line 972 
  
 void String::toUpper() void String::toUpper()
 { {
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
  
     if (InitializeICU::initICUSuccessful())     if (InitializeICU::initICUSuccessful())
     {     {
           if (Atomic_get(&_rep->refs) != 1)
               _rep = StringRep::copy_on_write(_rep);
   
           // This will do a locale-insensitive, but context-sensitive convert.
           // Since context-sensitive casing looks at adjacent chars, this
           // prevents optimizations where the us-ascii is converted before
           // calling ICU.
           // The string may shrink or expand after the convert.
   
         //// First calculate size of resulting string. u_strToUpper() returns         //// First calculate size of resulting string. u_strToUpper() returns
         //// only the size when zero is passed as the destination size argument.         //// only the size when zero is passed as the destination size argument.
  
Line 876 
Line 993 
         int32_t new_size = u_strToUpper(         int32_t new_size = u_strToUpper(
             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);             NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
  
           err = U_ZERO_ERROR;
   
         //// Reserve enough space for the result.         //// Reserve enough space for the result.
  
         if ((Uint32)new_size > _rep->cap)         if ((Uint32)new_size > _rep->cap)
Line 887 
Line 1006 
             (UChar*)_rep->data, _rep->size, NULL, &err);             (UChar*)_rep->data, _rep->size, NULL, &err);
  
         _rep->size = new_size;         _rep->size = new_size;
   
           return;
     }     }
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
  
     if (Atomic_get(&_rep->refs) != 1)     if (Atomic_get(&_rep->refs) != 1)
         _rep = StringRep::copy_on_write(_rep);         _rep = StringRep::copy_on_write(_rep);
Line 929 
Line 1050 
  
 int String::compareNoCase(const String& str1, const String& str2) int String::compareNoCase(const String& str1, const String& str2)
 { {
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
  
     if (InitializeICU::initICUSuccessful())     if (InitializeICU::initICUSuccessful())
     {     {
Line 937 
Line 1058 
             str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);             str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
     }     }
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
  
     const Uint16* s1 = str1._rep->data;     const Uint16* s1 = str1._rep->data;
     const Uint16* s2 = str2._rep->data;     const Uint16* s2 = str2._rep->data;
Line 960 
Line 1081 
  
 Boolean String::equalNoCase_aux(const String& s1, const String& s2) Boolean String::equalNoCase_aux(const String& s1, const String& s2)
 { {
 #ifdef PEGASUS_STRING_ENABLE_ICU  #ifdef PEGASUS_HAS_ICU
  
     return String::compareNoCase(s1, s2) == 0;     return String::compareNoCase(s1, s2) == 0;
  
 #else /* PEGASUS_STRING_ENABLE_ICU */  #else /* PEGASUS_HAS_ICU */
  
     Uint16* p = (Uint16*)s1._rep->data;     Uint16* p = (Uint16*)s1._rep->data;
     Uint16* q = (Uint16*)s2._rep->data;     Uint16* q = (Uint16*)s2._rep->data;
Line 1015 
Line 1136 
  
     return true;     return true;
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
 } }
  
 Boolean String::equalNoCase(const String& s1, const char* s2) Boolean String::equalNoCase(const String& s1, const char* s2)
 { {
     _check_null_pointer(s2);     _check_null_pointer(s2);
  
 #if defined(PEGASUS_STRING_ENABLE_ICU)  #if defined(PEGASUS_HAS_ICU)
  
     return String::equalNoCase(s1, String(s2));     return String::equalNoCase(s1, String(s2));
  
Line 1037 
Line 1158 
         if (!*p2)         if (!*p2)
             return false;             return false;
  
         if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++)))          if (_to_upper(*p1++) != _to_upper_tbl[int(*p2++)])
             return false;             return false;
     }     }
  
       if (*p2)
           return false;
   
     return true;     return true;
  
 #else /* PEGASUS_STRING_ENABLE_ICU */  #else /* PEGASUS_HAS_ICU */
  
     // ATTN: optimize this!     // ATTN: optimize this!
     return String::equalNoCase(s1, String(s2));     return String::equalNoCase(s1, String(s2));
  
 #endif /* PEGASUS_STRING_ENABLE_ICU */  #endif /* PEGASUS_HAS_ICU */
 } }
  
 Boolean String::equal(const String& s1, const String& s2) Boolean String::equal(const String& s1, const String& s2)
Line 1088 
Line 1212 
     CString cstr = str.getCString();     CString cstr = str.getCString();
     const char* utf8str = cstr;     const char* utf8str = cstr;
     os << utf8str;     os << utf8str;
       return os;
   #else
  
 #elif defined(PEGASUS_STRING_ENABLE_ICU)  #if defined(PEGASUS_HAS_ICU)
  
     if (InitializeICU::initICUSuccessful())     if (InitializeICU::initICUSuccessful())
     {     {
Line 1103 
Line 1229 
         os << buf;         os << buf;
         os.flush();         os.flush();
         delete [] buf;         delete [] buf;
           return os;
     }     }
  
 #endif /* PEGASUS_OS_OS400 */  #endif  // PEGASUS_HAS_ICU
  
     for (Uint32 i = 0, n = str.size(); i < n; i++)     for (Uint32 i = 0, n = str.size(); i < n; i++)
     {     {
Line 1123 
Line 1250 
     }     }
  
     return os;     return os;
   #endif // PEGASUS_OS_OS400
 } }
  
 void String::_append_char_aux() void String::_append_char_aux()
Line 1246 
Line 1374 
             }             }
  
     9.  Experimented to find the optimial initial size for a short string.     9.  Experimented to find the optimial initial size for a short string.
         Eight seems to offer the best tradoff between space and time.          Eight seems to offer the best tradeoff between space and time.
  
     10. Inlined all members of the Char16 class.     10. Inlined all members of the Char16 class.
  
Line 1272 
Line 1400 
  
         This avoids slower UTF8 processing when not needed.         This avoids slower UTF8 processing when not needed.
  
   BUG-4200 Review notes:
   
       1.  Use PEGASUS_USE_EXPERIMENTAL_INTERFACES instead of
           PEGASUS_STRING_EXTENSIONS.
   
           Status: done
   
       2.  Doc++ String.h
   
           Status: pending
   
       3.  Look at PEP223 for security coding guidelines for strings.
   
           Status: pending
   
       4.  Increasing the number of objects may break Windows 2000 build
           (limit of 2048 bytes for command line). See BUG-2754
   
           Status: submitted patch for bug 2754
   
       5.  Concerns about whether generating inlines and non-inline versions
           of functions will work with all compilers.
   
           Status: tested on Windows. Also showed how inlining can be disabled
           on platforms that don't support it.
   
       6.  Atomic.h -- more to come
   
 ================================================================================ ================================================================================
 */ */


Legend:
Removed from v.1.111.2.7  
changed lines
  Added in v.1.111.6.3

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2