1 martin 1.10 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.11 //
|
3 martin 1.10 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.11 //
|
10 martin 1.10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.11 //
|
17 martin 1.10 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.11 //
|
20 martin 1.10 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.11 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.10 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.11 //
|
28 martin 1.10 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.2 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32 #ifndef _Pegasus_StringRep_h
33 #define _Pegasus_StringRep_h
34
|
35 mike 1.4 #include <Pegasus/Common/Config.h>
36 #include <Pegasus/Common/AtomicInt.h>
|
37 thilo.boehm 1.12 #include <Pegasus/Common/CommonUTF.h>
|
38 mike 1.2 #include <new>
39
40 PEGASUS_NAMESPACE_BEGIN
41
|
42 mike 1.7 struct PEGASUS_COMMON_LINKAGE StringRep
|
43 mike 1.2 {
44 StringRep();
45
46 ~StringRep();
47
48 static StringRep* alloc(size_t cap);
49
50 static void free(StringRep* rep);
51
52 static StringRep* create(const Uint16* data, size_t size);
53
54 static StringRep* create(const char* data, size_t size);
55
56 static StringRep* copyOnWrite(StringRep* rep);
57
58 static Uint32 length(const Uint16* str);
59
60 static void ref(const StringRep* rep);
61
62 static void unref(const StringRep* rep);
63
64 mike 1.2 static StringRep _emptyRep;
65
66 // Number of characters in this string, excluding the null terminator.
67 size_t size;
68
69 // Number of characters this representation has room for. This is
70 // greater or equal to size.
71 size_t cap;
72
73 // Number of string refering to this StringRep (1, 2, etc).
74 AtomicInt refs;
75
76 // The first character in the string. Extra space is allocated off the
77 // end of this structure for additional characters.
78 Uint16 data[1];
79 };
80
81 inline void StringRep::free(StringRep* rep)
82 {
83 rep->refs.~AtomicInt();
84 ::operator delete(rep);
85 mike 1.2 }
86
87 inline StringRep::StringRep() : size(0), cap(0), refs(2)
88 {
89 // Only called on _emptyRep. We set the reference count to two to
90 // keep a String from modifying it (if the reference count were one,
91 // a string would think it was the sole owner of the StringRep object).
92 data[0] = 0;
93 }
94
95 inline StringRep::~StringRep()
96 {
97 // Only called on _emptyRep.
98 }
99
100 inline void StringRep::ref(const StringRep* rep)
101 {
102 if (rep != &StringRep::_emptyRep)
103 ((StringRep*)rep)->refs++;
104 }
105
106 mike 1.2 inline void StringRep::unref(const StringRep* rep)
107 {
|
108 kumpf 1.9 if (rep != &StringRep::_emptyRep &&
109 ((StringRep*)rep)->refs.decAndTestIfZero())
|
110 mike 1.2 StringRep::free((StringRep*)rep);
111 }
112
113 PEGASUS_COMMON_LINKAGE void StringThrowOutOfBounds();
114
115 PEGASUS_COMMON_LINKAGE void StringAppendCharAux(StringRep*& _rep);
116
117 PEGASUS_COMMON_LINKAGE Boolean StringEqualNoCase(
118 const String& s1, const String& s2);
119
120 PEGASUS_COMMON_LINKAGE Uint32 StringFindAux(
121 const StringRep* _rep, const Char16* s, Uint32 n);
122
123 inline void _checkBounds(size_t index, size_t size)
124 {
125 if (index > size)
|
126 dl.meetei 1.13 {
|
127 mike 1.2 StringThrowOutOfBounds();
|
128 dl.meetei 1.13 }
|
129 mike 1.2 }
130
|
131 thilo.boehm 1.12 template<class P, class Q>
132 static void _copy(P* p, const Q* q, size_t n)
133 {
134 // The following employs loop unrolling for efficiency. Please do not
135 // eliminate.
136
137 while (n >= 8)
138 {
139 p[0] = q[0];
140 p[1] = q[1];
141 p[2] = q[2];
142 p[3] = q[3];
143 p[4] = q[4];
144 p[5] = q[5];
145 p[6] = q[6];
146 p[7] = q[7];
147 p += 8;
148 q += 8;
149 n -= 8;
150 }
151
152 thilo.boehm 1.12 while (n >= 4)
153 {
154 p[0] = q[0];
155 p[1] = q[1];
156 p[2] = q[2];
157 p[3] = q[3];
158 p += 4;
159 q += 4;
160 n -= 4;
161 }
162
163 while (n--)
164 *p++ = *q++;
165 }
166
167 static size_t _copyFromUTF8(
168 Uint16* dest,
169 const char* src,
170 size_t n,
171 size_t& utf8_error_index)
172 {
173 thilo.boehm 1.12 Uint16* p = dest;
174 const Uint8* q = (const Uint8*)src;
175
176 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
177 // Use loop-unrolling.
178
|
179 dl.meetei 1.13 while ( (n >=8) && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
|
180 thilo.boehm 1.12 {
181 p[0] = q[0];
182 p[1] = q[1];
183 p[2] = q[2];
184 p[3] = q[3];
185 p[4] = q[4];
186 p[5] = q[5];
187 p[6] = q[6];
188 p[7] = q[7];
189 p += 8;
190 q += 8;
191 n -= 8;
192 }
193
|
194 dl.meetei 1.13 while ((n >=4) && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
|
195 thilo.boehm 1.12 {
196 p[0] = q[0];
197 p[1] = q[1];
198 p[2] = q[2];
199 p[3] = q[3];
200 p += 4;
201 q += 4;
202 n -= 4;
203 }
204
205 switch (n)
206 {
207 case 0:
208 return p - dest;
209 case 1:
210 if (q[0] < 128)
211 {
212 p[0] = q[0];
213 return p + 1 - dest;
214 }
215 break;
216 thilo.boehm 1.12 case 2:
217 if (((q[0]|q[1]) & 0x80) == 0)
218 {
219 p[0] = q[0];
220 p[1] = q[1];
221 return p + 2 - dest;
222 }
223 break;
224 case 3:
225 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
226 {
227 p[0] = q[0];
228 p[1] = q[1];
229 p[2] = q[2];
230 return p + 3 - dest;
231 }
232 break;
233 }
234
235 // Process remaining characters.
236
237 thilo.boehm 1.12 while (n)
238 {
239 // Optimize for 7-bit ASCII case.
240
241 if (*q < 128)
242 {
243 *p++ = *q++;
244 n--;
245 }
246 else
247 {
248 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
249
250 if (c > n || !isValid_U8(q, c) ||
251 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
252 {
253 utf8_error_index = q - (const Uint8*)src;
254 return size_t(-1);
255 }
256
257 n -= c;
258 thilo.boehm 1.12 }
259 }
260
261 return p - dest;
262 }
263
264 static inline size_t _convert(
265 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
266 {
267 #ifdef PEGASUS_STRING_NO_UTF8
268 _copy(p, q, n);
269 return n;
270 #else
271 return _copyFromUTF8(p, q, n, utf8_error_index);
272 #endif
273 }
274
|
275 mike 1.2 PEGASUS_NAMESPACE_END
276
277 #endif /* _Pegasus_StringRep_h */
|