9 humberto 1.8 //
10 // Permission is hereby granted, free of charge, to any person obtaining a copy
11 // of this software and associated documentation files (the "Software"), to
12 // deal in the Software without restriction, including without limitation the
13 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14 // sell copies of the Software, and to permit persons to whom the Software is
15 // furnished to do so, subject to the following conditions:
16 //
17 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
18 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
19 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
20 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
21 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 //
26 //==============================================================================
27 //
28 // Author: Humberto Rivero (hurivero@us.ibm.com)
29 //
30 humberto 1.8 // Modified By:
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 #include <Pegasus/Common/LanguageParser.h>
35 #include <Pegasus/Common/InternalException.h>
36 #include <Pegasus/Common/Tracer.h>
37 #include <Pegasus/Common/MessageLoader.h> //l10n
38 #include <cstring>
39
40 //PEGASUS_USING_STD;
41 PEGASUS_NAMESPACE_BEGIN
42
43 const LanguageParser LanguageParser::EMPTY = LanguageParser();
44
45 void LanguageParser::parseHdr(Array<String> &values, String & hdr){
46 // look for ',' which designates distict (Accept/Content)-Language fields
47 // the form: [languagetag, languagetag, languagetag] so whitespace removal
48 // may be necessary.
49 // then store them in the array
50 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseHdr");
51 humberto 1.8 Uint32 i = 0;
52 while( i != PEG_NOT_FOUND ){
53 i = hdr.find(",");
54 if( i != PEG_NOT_FOUND ){
55 values.append(hdr.subString(0,i));
56 while(hdr[i+1] == ' ') i++; // get rid of whitespace after ","
57 hdr = hdr.subString(i+1);
58 }
59 else{ // only one field, build an object with it
60 values.append(hdr);
61 }
62 }
63 PEG_METHOD_EXIT();
64 }
65
66 Real32 LanguageParser::parseAcceptLanguageValue(String &language_tag, String & hdr){
67 // look for ';' in hdr, that means we have a quality value to capture
68 // if not, we only have a language
69
70 // if hdr begins with "x-" then we have a non-IANA (private) language tag
71 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageValue");
72 humberto 1.8 Uint32 i;
73 Boolean validate_length = true;
74 if((( i = hdr.find("x-")) != PEG_NOT_FOUND ) && (i == 0)){
75 hdr = convertPrivateLanguageTag(hdr);
76 validate_length = false;
77 }
78
79 // get rid of any beginning or trailing whitespaces
80 Uint32 j;
81 while( (j = hdr.find(" ")) != PEG_NOT_FOUND ){
82 hdr.remove(j,1);
83 }
84
85 Real32 quality = 1;
86 i = hdr.find(";");
87 if(i != PEG_NOT_FOUND){ // extract and store language and quality
88 if(isValid(hdr.subString(0,i), validate_length)){
89 language_tag = hdr.subString(0,i);
90 if(hdr.size() > i + 3)
91 hdr.remove(0,i+3); // remove everything but the quality value
92 else{
93 humberto 1.8 MessageLoaderParms parms("Common.LanguageParser.INVALID_QUALITY_VALUE",
94 "AcceptLanguage contains an invalid quality value");
95 throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
96 }
97 }
98 else{
99 //l10n
100 //throw InvalidAcceptLanguageHeader(
101 //"AcceptLanguage contains too many characters or non-alpha characters");
102 MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_AL",
103 "AcceptLanguage contains too many characters or non-alpha characters");
104 throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
105 }
106 //validate quality
107 quality = atof(hdr.getCString());
108 if(quality > 1.0 || quality < 0.0){
109 //l10n
110 //throw InvalidAcceptLanguageHeader(
111 //"AcceptLanguage contains an invalid quality value");
112 MessageLoaderParms parms("Common.LanguageParser.INVALID_QUALITY_VALUE",
113 "AcceptLanguage contains an invalid quality value");
114 humberto 1.8 throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
115 }
116 }
117 else{ // extract and store language, quality defaults to 1.0
118 if(isValid(hdr, validate_length)) language_tag = hdr;
119 else{
120 //l10n
121 //throw InvalidAcceptLanguageHeader(
122 //"AcceptLanguage contains too many characters or non-alpha characters");
123 MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_AL",
124 "AcceptLanguage contains too many characters or non-alpha characters");
125 throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
126 }
127 }
128
129 PEG_METHOD_EXIT();
130 return quality;
131 }
132
133 String LanguageParser::parseContentLanguageValue(String & hdr){
134 // we are looking for the language part of the hdr only,
135 humberto 1.8 // according to the RFC, there may be parenthesized strings
136 // that describe the purpose of the language, we need to ignore those
137 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageValue");
138 String value = hdr;
139 Uint32 i,j;
140 while((i = value.find("(")) != PEG_NOT_FOUND){ // get rid of anything in parenthesis in hdr if found
141 if((j = value.find(")")) != PEG_NOT_FOUND)
142 value.remove(i, (j-i)+1);
143 else{
144 //l10n
145 //throw InvalidContentLanguageHeader(
146 //"ContentLanguage does not contain terminating ) character");
147 MessageLoaderParms parms("Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
148 "ContentLanguage does not contain terminating ) character");
149 throw InvalidContentLanguageHeader(MessageLoader::getMessage(parms));
150 }
151 }
152 // get rid of any beginning or trailing whitespaces
153 while( (i = value.find(" ")) != PEG_NOT_FOUND ){
154 value.remove(i,1);
155 }
156 humberto 1.8 if(!isValid(value)){
157 //l10n
158 //throw InvalidContentLanguageHeader(
159 //"ContentLanguage contains too many characters or non-alpha characters");
160 MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_CL",
161 "ContentLanguage contains too many characters or non-alpha characters");
162 throw InvalidContentLanguageHeader(MessageLoader::getMessage(parms));
163
164 }
165 PEG_METHOD_EXIT();
166 return value;
167 }
168
169 String LanguageParser::getLanguage(String & language_tag){
170 // given a language_tag: en-US-mn we want to return "en"
171 Uint32 i;
172 if((i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND)
173 return language_tag.subString(0,i);
174 return String(language_tag);
175 }
176
177 humberto 1.8 String LanguageParser::getCountry(String & language_tag){
178 // given a language_tag: en-US-mn we want to return "US"
179 Uint32 i,j;
180 if( (i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
181 if( (j = language_tag.find(i+1, findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
182 return language_tag.subString(i+1, j-(i+1));
183 else
184 return language_tag.subString(i+1);
185 return String::EMPTY;
186 }
187
188 String LanguageParser::getVariant(String & language_tag){
189 // given a language_tag: en-US-mn we want to return "mn"
190 Uint32 i;
191 if( (i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
192 if( (i = language_tag.find(i+1, findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
193 return language_tag.subString(i+1);
194 return String::EMPTY;
195 }
196
197 void LanguageParser::parseLanguageSubtags(Array<String> &subtags, String language_tag){
198 humberto 1.8 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageSubtags");
199 Uint32 i;
200 char separator = findSeparator(language_tag.getCString());
201 while( (i = language_tag.find(Char16(separator))) != PEG_NOT_FOUND ){
202 subtags.append(language_tag.subString(0,i));
203 language_tag.remove(0,i + 1);
204 }
205 if(language_tag.size() > 0)
206 subtags.append(language_tag);
207 PEG_METHOD_EXIT();
208 }
209
210 Boolean LanguageParser::isValid(String language_tag, Boolean validate_length){
211 //break the String down into parts(subtags), then validate each part
212
213 if(language_tag == "*") return true;
214
215 Array<String> subtags;
216 parseLanguageSubtags(subtags, language_tag);
217 if(subtags.size() > 0){
218 for(Uint32 i = 0; i < subtags.size(); i++){
219 humberto 1.8 //length should be 8 or less AND all characters should be A-Z or a-z
220 if((validate_length && subtags[i].size() > 8) || !checkAlpha(subtags[i].getCString()))
221 return false;
222 }
223 }
224 else{ //nothing back from parseLanguageSubtags
225 return false;
226 }
227 return true;
228 }
229
230 String LanguageParser::convertPrivateLanguageTag(String language_tag){
231 // figure out if its a unix style locale or windows locale
232 Uint32 i;
233 if(( i = language_tag.find("pegasus-")) != PEG_NOT_FOUND ){
234 language_tag = language_tag.subString(i+5); // capture the remainder of the string
235 return String(replaceSeparator(language_tag.getCString(), '-'));
236 }
237 //else if( (i = language_tag.find("win-")) != PEG_NOT_FOUND ){
238 // return language_tag.subString(i+4); // capture the remainder of the string
239 // call windows ID to ICU convert routine or locmap.c function here
240 humberto 1.8 //}
241 else{
242 return language_tag;
243 }
244 }
245
246 Boolean LanguageParser::checkAlpha(CString _str){
|