1 david 1.1 //%/////////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2003 BMC Software, Hewlett-Packard Company, IBM,
4 // The Open Group, Tivoli Systems
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
7 // of this software and associated documentation files (the "Software"), to
8 // deal in the Software without restriction, including without limitation the
9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 // sell copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
13 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
14 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 //
22 david 1.1 //==============================================================================
23 //
24 // Author: Dave Rosckes (rosckes@us.ibm.com)
25 //
26 //
27 //%/////////////////////////////////////////////////////////////////////////////
28
29 #ifndef Pegasus_CommonUTF_h
30 #define Pegasus_CommonUTF_h
31 #include <Pegasus/Common/Config.h>
32
33 PEGASUS_NAMESPACE_BEGIN
34
35 #define FIRST_HIGH_SURROGATE (Uint32)0xD800
36 #define LAST_HIGH_SURROGATE (Uint32)0xDBFF
37 #define FIRST_LOW_SURROGATE (Uint32)0xDC00
38 #define LAST_LOW_SURROGATE (Uint32)0xDFFF
39 #define REPLACEMENT_CHARACTER (Uint32)0x0000FFFD
40 #define MAX_BYTE (Uint32)0x0000FFFF
41 #define MAX_UTF16 (Uint32)0x0010FFFF
42
43 david 1.1 static const Uint32 halfBase = 0x0010000UL;
44 static const Uint32 halfMask = 0x3FFUL;
45 static const int halfShift = 10;
46 static const Uint8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
47
48 static const Uint32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
49 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
50
51 static const char trailingBytesForUTF8[256] = {
52 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
58 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
59 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
60 };
61
62 #define UTF_8_COUNT_TRAIL_BYTES(leadByte) (trailingBytesForUTF8[(Uint8)leadByte])
63
64 david 1.1 #define UTF_8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
65
66 #define UTF8_NEXT(s, i, c) { \
67 (c)=(s)[(i)++]; \
68 if((Uint8)((c)-0xc0)<0x35) { \
69 Uint8 __count=UTF_8_COUNT_TRAIL_BYTES(c); \
70 UTF_8_MASK_LEAD_BYTE(c, __count); \
71 switch(__count) { \
72 case 3: \
73 (c)=((c)<<6)|((s)[(i)++]&0x3f); \
74 case 2: \
75 (c)=((c)<<6)|((s)[(i)++]&0x3f); \
76 case 1: \
77 (c)=((c)<<6)|((s)[(i)++]&0x3f); \
78 break; \
79 } \
80 } \
81 }
82
83 int isValid_U8(const Uint8 *src,int size);
84 int UTF16toUTF8(const Uint16** srcHead,
85 david 1.1 const Uint16* srcEnd,
86 Uint8** tgtHead,
87 Uint8* tgtEnd);
88
89 int UTF8toUTF16 (const Uint8** srcHead,
90 const Uint8* srcEnd,
91 Uint16** tgtHead,
92 Uint16* tgtEnd);
93
94 PEGASUS_NAMESPACE_END
95 #endif
|