1 martin 1.35 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.36 //
|
3 martin 1.35 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.36 //
|
10 martin 1.35 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.36 //
|
17 martin 1.35 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.36 //
|
20 martin 1.35 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.36 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.35 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.36 //
|
28 martin 1.35 //////////////////////////////////////////////////////////////////////////
|
29 humberto 1.8 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32 #include <Pegasus/Common/LanguageParser.h>
33 #include <Pegasus/Common/InternalException.h>
34 #include <Pegasus/Common/Tracer.h>
|
35 kumpf 1.30 #include <Pegasus/Common/MessageLoader.h>
|
36 humberto 1.8 #include <cstring>
37
|
38 kumpf 1.21 #ifdef PEGASUS_HAS_ICU
39 # include <unicode/locid.h>
40 #endif
41
|
42 humberto 1.8 PEGASUS_NAMESPACE_BEGIN
43
|
44 kumpf 1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
|
45 kumpf 1.26 static char LOCALE_ID_SEPARATOR_CHAR = '_';
|
46 kumpf 1.21
|
47 kumpf 1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
|
48 kumpf 1.21 const String& acceptLanguageHeader)
49 {
50 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
51
|
52 kumpf 1.23 AcceptLanguageList acceptLanguages;
|
53 kumpf 1.21
54 try
55 {
56 Array<String> languageElements;
57 LanguageParser::_parseLanguageHeader(
58 acceptLanguageHeader,
59 languageElements);
60
61 for (Uint32 i = 0; i < languageElements.size(); i++)
62 {
63 String languageTagString;
64 Real32 qualityValue;
65 LanguageParser::_parseAcceptLanguageElement(
66 languageElements[i], languageTagString, qualityValue);
|
67 kumpf 1.30 acceptLanguages.insert(
68 LanguageTag(languageTagString), qualityValue);
|
69 kumpf 1.21 }
70 }
71 catch (Exception& e)
72 {
73 throw InvalidAcceptLanguageHeader(e.getMessage());
74 }
75
76 PEG_METHOD_EXIT();
77 return acceptLanguages;
78 }
79
|
80 kumpf 1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
|
81 kumpf 1.21 const String& contentLanguageHeader)
82 {
83 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
84
|
85 kumpf 1.23 ContentLanguageList contentLanguages;
|
86 kumpf 1.21
87 try
88 {
89 Array<String> languageElements;
90 LanguageParser::_parseLanguageHeader(
91 contentLanguageHeader,
92 languageElements);
93
94 for (Uint32 i = 0; i < languageElements.size(); i++)
95 {
96 contentLanguages.append(LanguageTag(languageElements[i]));
97 }
98 }
99 catch (Exception& e)
100 {
101 throw InvalidContentLanguageHeader(e.getMessage());
102 }
103
104 PEG_METHOD_EXIT();
105 return contentLanguages;
106 }
107 kumpf 1.21
108 void LanguageParser::parseLanguageTag(
|
109 kumpf 1.24 const String& languageTagString,
|
110 kumpf 1.21 String& language,
111 String& country,
112 String& variant)
113 {
114 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
115
116 language.clear();
117 country.clear();
118 variant.clear();
119
120 if (languageTagString == "*")
121 {
122 // Parsing and validation is complete
123 PEG_METHOD_EXIT();
124 return;
125 }
126
127 Boolean isStandardFormat = true; // RFC 3066 (ISO 639, ISO 3166)
128 Array<String> subtags;
129
130 _parseLanguageSubtags(subtags, languageTagString);
131 kumpf 1.21
132 // _parseLanguageSubtags() always returns at least one subtag.
133 PEGASUS_ASSERT(subtags.size() > 0);
134
135 // Validate the primary subtag.
136 // Given a languageTagString "en-US-mn" the language is "en".
137
138 language = subtags[0];
139
140 if ((language == "i") || (language == "x"))
141 {
142 // These primary tags are allowed, but are not ISO 639 compliant
143 isStandardFormat = false;
144 language.clear();
145 }
146 else if ((language.size() != 2) && (language.size() != 3))
147 {
148 // Except for "i" and "x", primary tags must be 2 or 3 characters,
149 // according to RFC 3066.
|
150 kumpf 1.33
151 // Do not localize this message; it could cause recursion.
|
152 kumpf 1.21 PEG_METHOD_EXIT();
|
153 kumpf 1.33 throw Exception(Formatter::format(
154 "Invalid language tag \"$0\".",
155 languageTagString));
|
156 kumpf 1.21 }
157
158 if (subtags.size() == 1)
159 {
160 // If only the primary subtag is present, we are done!
161 PEG_METHOD_EXIT();
162 return;
163 }
164
165 // Validate the second subtag.
166 // Given a languageTagString "en-US-mn" the country is "US".
167
168 if (subtags[1].size() == 1)
169 {
170 // The second subtag may not be a single character according to
171 // RFC 3066.
|
172 kumpf 1.33
173 // Do not localize this message; it could cause recursion.
|
174 kumpf 1.21 PEG_METHOD_EXIT();
|
175 kumpf 1.33 throw Exception(Formatter::format(
176 "Invalid language tag \"$0\".",
177 languageTagString));
|
178 kumpf 1.21 }
179
180 if (isStandardFormat)
181 {
182 Uint32 variantIndex = 1;
183
184 if (subtags[1].size() == 2)
185 {
186 country = subtags[1];
187 variantIndex = 2;
188 }
189
190 Uint32 numSubtags = subtags.size();
191
192 if (variantIndex < numSubtags)
193 {
194 variant = subtags[variantIndex++];
195
196 while (variantIndex < numSubtags)
197 {
198 variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
199 kumpf 1.21 variant.append(subtags[variantIndex++]);
200 }
201 }
202 }
203
204 PEG_METHOD_EXIT();
205 }
206
207 void LanguageParser::validateQualityValue(Real32 quality)
208 {
209 if ((quality > 1.0) || (quality < 0.0))
210 {
211 MessageLoaderParms parms(
212 "Common.LanguageParser.INVALID_QUALITY_VALUE",
213 "AcceptLanguage contains an invalid quality value");
214 throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
215 }
216 }
217
218 String LanguageParser::buildAcceptLanguageHeader(
|
219 kumpf 1.23 const AcceptLanguageList& acceptLanguages)
|
220 kumpf 1.21 {
221 String alString;
222 Uint32 numAcceptLanguages = acceptLanguages.size();
223
224 for (Uint32 i = 0; i < numAcceptLanguages; i++)
225 {
226 alString.append(acceptLanguages.getLanguageTag(i).toString());
227
228 Real32 q = acceptLanguages.getQualityValue(i);
229 if (q != 1.0)
230 {
231 char qValueString[6];
232 sprintf(qValueString, "%4.3f", q);
233 alString.append(";q=");
234 alString.append(qValueString);
235 }
236
237 if (i < numAcceptLanguages - 1)
238 {
239 alString.append(",");
240 }
241 kumpf 1.21 }
242
243 return alString;
244 }
245
246 String LanguageParser::buildContentLanguageHeader(
|
247 kumpf 1.23 const ContentLanguageList& contentLanguages)
|
248 kumpf 1.21 {
249 String clString;
250 Uint32 numContentLanguages = contentLanguages.size();
251
252 for (Uint32 i = 0; i < numContentLanguages; i++)
253 {
254 clString.append(contentLanguages.getLanguageTag(i).toString());
255
256 if (i < numContentLanguages - 1)
257 {
258 clString.append(",");
259 }
260 }
261
262 return clString;
263 }
264
|
265 kumpf 1.26 #ifdef PEGASUS_HAS_ICU
266 String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
267 {
268 Uint32 index = 0;
269 while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
270 PEG_NOT_FOUND)
271 {
272 localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
273 }
274 return localeId;
275 }
276 #endif
277
|
278 kumpf 1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
|
279 kumpf 1.21 {
280 #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
281 Locale default_loc = Locale::getDefault();
282
|
283 kumpf 1.26 String localeId = default_loc.getName();
284
|
285 kumpf 1.21 try
286 {
|
287 kumpf 1.26 return LanguageParser::parseAcceptLanguageHeader(
288 convertLocaleIdToLanguageTag(localeId));
|
289 kumpf 1.21 }
290 catch (const InvalidAcceptLanguageHeader& e)
291 {
292 Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
|
293 kumpf 1.34 MessageLoaderParms(
294 "src.Server.cimserver.FAILED_TO_SET_PROCESS_LOCALE",
295 "Cannot convert the system process locale into a valid "
296 "Accept-Language format."));
|
297 kumpf 1.21 Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
298 e.getMessage());
|
299 kumpf 1.23 AcceptLanguageList al;
|
300 kumpf 1.22 al.insert(LanguageTag("*"), 1);
301 return al;
|
302 kumpf 1.21 }
|
303 kumpf 1.22 #else
|
304 kumpf 1.23 return AcceptLanguageList();
|
305 kumpf 1.21 #endif
306 }
307
308 void LanguageParser::_parseLanguageHeader(
309 const String& languageHeaderValue,
310 Array<String>& languageElements)
311 {
312 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
313
314 languageElements.clear();
315 String element;
316
317 for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
318 {
319 Char16 nextChar = languageHeaderValue[i];
320
321 if (isascii(nextChar) && isspace(nextChar))
322 {
323 // Ignore whitespace
324 }
325 else if (nextChar == '(')
326 kumpf 1.21 {
327 // Ignore comments
328 while (i < len)
329 {
330 // Search for the closing parenthesis
331 if (languageHeaderValue[i] == ')')
332 {
333 break;
334 }
335
336 // Skip over escape characters
337 if (languageHeaderValue[i] == '\\')
338 {
339 i++;
340 }
341
342 i++;
343 }
344
345 // Check for a missing closing parenthesis
346 if (i >= len)
347 kumpf 1.21 {
348 MessageLoaderParms parms(
349 "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
350 "Closing \")\" character is missing.");
351 throw Exception(MessageLoader::getMessage(parms));
352 }
353 }
354 else if (nextChar == ',')
355 {
356 // Check for the end of the element
357 languageElements.append(element);
358 element.clear();
359 }
360 else
361 {
362 // Unescape an escape character
363 if ((nextChar == '\\') && (i < len-1))
364 {
365 nextChar = languageHeaderValue[++i];
366 }
367
368 kumpf 1.21 // Include this character in the value
369 element.append(nextChar);
370 }
371 }
372
373 // Include the last element in the languageElements array
374 languageElements.append(element);
375
376 PEG_METHOD_EXIT();
377 }
378
379 void LanguageParser::_parseAcceptLanguageElement(
380 const String& acceptLanguageElement,
381 String& languageTag,
382 Real32& quality)
383 {
384 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
385
386 // look for ';' in acceptLanguageElement, that means we have a
387 // quality value to capture. If not present, we only have a language tag.
388
389 kumpf 1.21 Uint32 semicolonIndex = acceptLanguageElement.find(";");
390 if (semicolonIndex != PEG_NOT_FOUND)
391 {
392 // Separate the language tag and quality value
393
394 String qualityString =
395 acceptLanguageElement.subString(semicolonIndex+1);
396 languageTag = acceptLanguageElement.subString(0, semicolonIndex);
397
398 // Parse the quality value
|
399 humberto 1.8
|
400 kumpf 1.21 char dummyChar;
401 int scanfConversions = sscanf(
402 qualityString.getCString(),
403 "q=%f%c", &quality, &dummyChar);
404
405 if ((scanfConversions != 1) ||
406 (qualityString.size() > 7))
407 {
408 MessageLoaderParms parms(
409 "Common.LanguageParser.INVALID_QUALITY_VALUE",
410 "AcceptLanguage contains an invalid quality value");
411 PEG_METHOD_EXIT();
412 throw Exception(MessageLoader::getMessage(parms));
413 }
414 }
415 else
416 {
417 languageTag = acceptLanguageElement;
418 quality = 1.0;
419 }
420
421 kumpf 1.21 PEG_METHOD_EXIT();
422 }
423
424 void LanguageParser::_parseLanguageSubtags(
425 Array<String>& subtags,
426 const String& languageTagString)
427 {
428 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
429
430 // Parse the language tag into subtags
431
432 Uint32 subtagIndex = 0;
433 Uint32 separatorIndex;
434 while ((separatorIndex = languageTagString.find(
435 subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
436 {
437 subtags.append(languageTagString.subString(
438 subtagIndex, separatorIndex - subtagIndex));
439 subtagIndex = separatorIndex + 1;
440 }
441 subtags.append(languageTagString.subString(subtagIndex));
442 kumpf 1.21
443 // Validate the syntax of each of the subtags
444
445 for (Uint32 i = 0, n = subtags.size(); i < n; i++)
446 {
447 if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
448 ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
449 {
450 MessageLoaderParms parms(
451 "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
452 "Malformed language tag \"$0\".", languageTagString);
453 PEG_METHOD_EXIT();
|
454 marek 1.29 // throw Exception(MessageLoader::getMessage(parms));
|
455 marek 1.28 // do not localize message, requires a language tag for this
456 // localization can cause recursion here
457 throw Exception(parms.toString());
|
458 kumpf 1.21 }
459 }
460
461 PEG_METHOD_EXIT();
|
462 humberto 1.8 }
463
|
464 kumpf 1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
465 {
466 if ((subtag.size() == 0) || (subtag.size() > 8))
467 {
468 return false;
469 }
470
471 for (Uint32 i = 0, n = subtag.size(); i < n; i++)
472 {
473 if (!(isascii(subtag[i]) && isalpha(subtag[i])))
474 {
475 return false;
476 }
477 }
478
479 return true;
480 }
481
482 Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
483 {
484 if ((subtag.size() == 0) || (subtag.size() > 8))
485 kumpf 1.21 {
486 return false;
487 }
488
489 for (Uint32 i = 0, n = subtag.size(); i < n; i++)
490 {
491 if (!(isascii(subtag[i]) && isalnum(subtag[i])))
492 {
493 return false;
494 }
495 }
496
497 return true;
498 }
|
499 humberto 1.8
500 PEGASUS_NAMESPACE_END
|