1 martin 1.35 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.36 //
|
3 martin 1.35 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.36 //
|
10 martin 1.35 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.36 //
|
17 martin 1.35 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.36 //
|
20 martin 1.35 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.36 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.35 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.36 //
|
28 martin 1.35 //////////////////////////////////////////////////////////////////////////
|
29 humberto 1.8 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32 #include <Pegasus/Common/LanguageParser.h>
33 #include <Pegasus/Common/InternalException.h>
34 #include <Pegasus/Common/Tracer.h>
|
35 kumpf 1.30 #include <Pegasus/Common/MessageLoader.h>
|
36 humberto 1.8 #include <cstring>
37
|
38 kumpf 1.21 #ifdef PEGASUS_HAS_ICU
39 # include <unicode/locid.h>
40 #endif
41
|
42 humberto 1.8 PEGASUS_NAMESPACE_BEGIN
43
|
44 kumpf 1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
45
|
46 kumpf 1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
|
47 kumpf 1.21 const String& acceptLanguageHeader)
48 {
49 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
50
|
51 kumpf 1.23 AcceptLanguageList acceptLanguages;
|
52 kumpf 1.21
53 try
54 {
55 Array<String> languageElements;
56 LanguageParser::_parseLanguageHeader(
57 acceptLanguageHeader,
58 languageElements);
59
60 for (Uint32 i = 0; i < languageElements.size(); i++)
61 {
62 String languageTagString;
63 Real32 qualityValue;
64 LanguageParser::_parseAcceptLanguageElement(
65 languageElements[i], languageTagString, qualityValue);
|
66 kumpf 1.30 acceptLanguages.insert(
67 LanguageTag(languageTagString), qualityValue);
|
68 kumpf 1.21 }
69 }
70 catch (Exception& e)
71 {
72 throw InvalidAcceptLanguageHeader(e.getMessage());
73 }
74
75 PEG_METHOD_EXIT();
76 return acceptLanguages;
77 }
78
|
79 kumpf 1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
|
80 kumpf 1.21 const String& contentLanguageHeader)
81 {
82 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
83
|
84 kumpf 1.23 ContentLanguageList contentLanguages;
|
85 kumpf 1.21
86 try
87 {
88 Array<String> languageElements;
89 LanguageParser::_parseLanguageHeader(
90 contentLanguageHeader,
91 languageElements);
92
93 for (Uint32 i = 0; i < languageElements.size(); i++)
94 {
95 contentLanguages.append(LanguageTag(languageElements[i]));
96 }
97 }
98 catch (Exception& e)
99 {
100 throw InvalidContentLanguageHeader(e.getMessage());
101 }
102
103 PEG_METHOD_EXIT();
104 return contentLanguages;
105 }
106 kumpf 1.21
107 void LanguageParser::parseLanguageTag(
|
108 kumpf 1.24 const String& languageTagString,
|
109 kumpf 1.21 String& language,
110 String& country,
111 String& variant)
112 {
113 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
114
115 language.clear();
116 country.clear();
117 variant.clear();
118
119 if (languageTagString == "*")
120 {
121 // Parsing and validation is complete
122 PEG_METHOD_EXIT();
123 return;
124 }
125
126 Boolean isStandardFormat = true; // RFC 3066 (ISO 639, ISO 3166)
127 Array<String> subtags;
128
129 _parseLanguageSubtags(subtags, languageTagString);
130 kumpf 1.21
131 // _parseLanguageSubtags() always returns at least one subtag.
132 PEGASUS_ASSERT(subtags.size() > 0);
133
134 // Validate the primary subtag.
135 // Given a languageTagString "en-US-mn" the language is "en".
136
137 language = subtags[0];
138
139 if ((language == "i") || (language == "x"))
140 {
141 // These primary tags are allowed, but are not ISO 639 compliant
142 isStandardFormat = false;
143 language.clear();
144 }
145 else if ((language.size() != 2) && (language.size() != 3))
146 {
147 // Except for "i" and "x", primary tags must be 2 or 3 characters,
148 // according to RFC 3066.
|
149 kumpf 1.33
150 // Do not localize this message; it could cause recursion.
|
151 kumpf 1.21 PEG_METHOD_EXIT();
|
152 kumpf 1.33 throw Exception(Formatter::format(
153 "Invalid language tag \"$0\".",
154 languageTagString));
|
155 kumpf 1.21 }
156
157 if (subtags.size() == 1)
158 {
159 // If only the primary subtag is present, we are done!
160 PEG_METHOD_EXIT();
161 return;
162 }
163
164 // Validate the second subtag.
165 // Given a languageTagString "en-US-mn" the country is "US".
166
167 if (subtags[1].size() == 1)
168 {
169 // The second subtag may not be a single character according to
170 // RFC 3066.
|
171 kumpf 1.33
172 // Do not localize this message; it could cause recursion.
|
173 kumpf 1.21 PEG_METHOD_EXIT();
|
174 kumpf 1.33 throw Exception(Formatter::format(
175 "Invalid language tag \"$0\".",
176 languageTagString));
|
177 kumpf 1.21 }
178
179 if (isStandardFormat)
180 {
181 Uint32 variantIndex = 1;
182
183 if (subtags[1].size() == 2)
184 {
185 country = subtags[1];
186 variantIndex = 2;
187 }
188
189 Uint32 numSubtags = subtags.size();
190
191 if (variantIndex < numSubtags)
192 {
193 variant = subtags[variantIndex++];
194
195 while (variantIndex < numSubtags)
196 {
197 variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
198 kumpf 1.21 variant.append(subtags[variantIndex++]);
199 }
200 }
201 }
202
203 PEG_METHOD_EXIT();
204 }
205
206 void LanguageParser::validateQualityValue(Real32 quality)
207 {
208 if ((quality > 1.0) || (quality < 0.0))
209 {
210 MessageLoaderParms parms(
211 "Common.LanguageParser.INVALID_QUALITY_VALUE",
212 "AcceptLanguage contains an invalid quality value");
213 throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
214 }
215 }
216
217 String LanguageParser::buildAcceptLanguageHeader(
|
218 kumpf 1.23 const AcceptLanguageList& acceptLanguages)
|
219 kumpf 1.21 {
220 String alString;
221 Uint32 numAcceptLanguages = acceptLanguages.size();
222
223 for (Uint32 i = 0; i < numAcceptLanguages; i++)
224 {
225 alString.append(acceptLanguages.getLanguageTag(i).toString());
226
227 Real32 q = acceptLanguages.getQualityValue(i);
228 if (q != 1.0)
229 {
230 char qValueString[6];
231 sprintf(qValueString, "%4.3f", q);
232 alString.append(";q=");
233 alString.append(qValueString);
234 }
235
236 if (i < numAcceptLanguages - 1)
237 {
238 alString.append(",");
239 }
240 kumpf 1.21 }
241
242 return alString;
243 }
244
245 String LanguageParser::buildContentLanguageHeader(
|
246 kumpf 1.23 const ContentLanguageList& contentLanguages)
|
247 kumpf 1.21 {
248 String clString;
249 Uint32 numContentLanguages = contentLanguages.size();
250
251 for (Uint32 i = 0; i < numContentLanguages; i++)
252 {
253 clString.append(contentLanguages.getLanguageTag(i).toString());
254
255 if (i < numContentLanguages - 1)
256 {
257 clString.append(",");
258 }
259 }
260
261 return clString;
262 }
263
|
264 kumpf 1.26 #ifdef PEGASUS_HAS_ICU
265 String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
266 {
|
267 marek 1.37 static char LOCALE_ID_SEPARATOR_CHAR = '_';
|
268 kumpf 1.26 Uint32 index = 0;
269 while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
270 PEG_NOT_FOUND)
271 {
272 localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
273 }
274 return localeId;
275 }
276 #endif
277
|
278 kumpf 1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
|
279 kumpf 1.21 {
280 #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
281 Locale default_loc = Locale::getDefault();
282
|
283 kumpf 1.26 String localeId = default_loc.getName();
284
|
285 kumpf 1.21 try
286 {
|
287 kumpf 1.26 return LanguageParser::parseAcceptLanguageHeader(
288 convertLocaleIdToLanguageTag(localeId));
|
289 kumpf 1.21 }
290 catch (const InvalidAcceptLanguageHeader& e)
291 {
292 Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
|
293 kumpf 1.34 MessageLoaderParms(
294 "src.Server.cimserver.FAILED_TO_SET_PROCESS_LOCALE",
295 "Cannot convert the system process locale into a valid "
296 "Accept-Language format."));
|
297 kumpf 1.21 Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
298 e.getMessage());
|
299 kumpf 1.23 AcceptLanguageList al;
|
300 kumpf 1.22 al.insert(LanguageTag("*"), 1);
301 return al;
|
302 kumpf 1.21 }
|
303 kumpf 1.22 #else
|
304 kumpf 1.23 return AcceptLanguageList();
|
305 kumpf 1.21 #endif
306 }
307
308 void LanguageParser::_parseLanguageHeader(
309 const String& languageHeaderValue,
310 Array<String>& languageElements)
311 {
312 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
313
314 languageElements.clear();
315 String element;
316
317 for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
318 {
319 Char16 nextChar = languageHeaderValue[i];
320
321 if (isascii(nextChar) && isspace(nextChar))
322 {
323 // Ignore whitespace
324 }
325 else if (nextChar == '(')
326 kumpf 1.21 {
327 // Ignore comments
328 while (i < len)
329 {
330 // Search for the closing parenthesis
331 if (languageHeaderValue[i] == ')')
332 {
333 break;
334 }
335
336 // Skip over escape characters
337 if (languageHeaderValue[i] == '\\')
338 {
339 i++;
340 }
341
342 i++;
343 }
344
345 // Check for a missing closing parenthesis
346 if (i >= len)
347 kumpf 1.21 {
348 MessageLoaderParms parms(
349 "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
350 "Closing \")\" character is missing.");
351 throw Exception(MessageLoader::getMessage(parms));
352 }
353 }
354 else if (nextChar == ',')
355 {
356 // Check for the end of the element
357 languageElements.append(element);
358 element.clear();
359 }
360 else
361 {
362 // Unescape an escape character
363 if ((nextChar == '\\') && (i < len-1))
364 {
365 nextChar = languageHeaderValue[++i];
366 }
367
368 kumpf 1.21 // Include this character in the value
369 element.append(nextChar);
370 }
371 }
372
373 // Include the last element in the languageElements array
374 languageElements.append(element);
375
376 PEG_METHOD_EXIT();
377 }
378
379 void LanguageParser::_parseAcceptLanguageElement(
380 const String& acceptLanguageElement,
381 String& languageTag,
382 Real32& quality)
383 {
384 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
385
386 // look for ';' in acceptLanguageElement, that means we have a
387 // quality value to capture. If not present, we only have a language tag.
388
389 kumpf 1.21 Uint32 semicolonIndex = acceptLanguageElement.find(";");
390 if (semicolonIndex != PEG_NOT_FOUND)
391 {
392 // Separate the language tag and quality value
393
394 String qualityString =
395 acceptLanguageElement.subString(semicolonIndex+1);
396 languageTag = acceptLanguageElement.subString(0, semicolonIndex);
397
398 // Parse the quality value
|
399 humberto 1.8
|
400 kumpf 1.21 char dummyChar;
401 int scanfConversions = sscanf(
402 qualityString.getCString(),
403 "q=%f%c", &quality, &dummyChar);
404
405 if ((scanfConversions != 1) ||
406 (qualityString.size() > 7))
407 {
408 MessageLoaderParms parms(
409 "Common.LanguageParser.INVALID_QUALITY_VALUE",
410 "AcceptLanguage contains an invalid quality value");
411 PEG_METHOD_EXIT();
412 throw Exception(MessageLoader::getMessage(parms));
413 }
414 }
415 else
416 {
417 languageTag = acceptLanguageElement;
418 quality = 1.0;
419 }
420
421 kumpf 1.21 PEG_METHOD_EXIT();
422 }
423
424 void LanguageParser::_parseLanguageSubtags(
425 Array<String>& subtags,
426 const String& languageTagString)
427 {
428 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
429
430 // Parse the language tag into subtags
431
432 Uint32 subtagIndex = 0;
433 Uint32 separatorIndex;
434 while ((separatorIndex = languageTagString.find(
435 subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
436 {
437 subtags.append(languageTagString.subString(
438 subtagIndex, separatorIndex - subtagIndex));
439 subtagIndex = separatorIndex + 1;
440 }
441 subtags.append(languageTagString.subString(subtagIndex));
442 kumpf 1.21
443 // Validate the syntax of each of the subtags
444
445 for (Uint32 i = 0, n = subtags.size(); i < n; i++)
446 {
447 if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
448 ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
449 {
|
450 marek 1.38 // throw Exception(MessageLoader::getMessage(parms));
451 // do not localize message, requires a language tag for this
452 // localization can cause recursion here
453 // MessageLoaderParms::toString adds 5kb static code size, Do NOT
454 // include in non-debug builds
455 #ifdef PEGASUS_DEBUG
|
456 kumpf 1.21 MessageLoaderParms parms(
457 "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
458 "Malformed language tag \"$0\".", languageTagString);
459 PEG_METHOD_EXIT();
|
460 marek 1.28 throw Exception(parms.toString());
|
461 marek 1.38 #else
462 String malFormed("Malformed language tag:");
463 malFormed.append(languageTagString);
464 PEG_METHOD_EXIT();
465 throw Exception(malFormed);
466 #endif
|
467 kumpf 1.21 }
468 }
469
470 PEG_METHOD_EXIT();
|
471 humberto 1.8 }
472
|
473 kumpf 1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
474 {
475 if ((subtag.size() == 0) || (subtag.size() > 8))
476 {
477 return false;
478 }
479
480 for (Uint32 i = 0, n = subtag.size(); i < n; i++)
481 {
482 if (!(isascii(subtag[i]) && isalpha(subtag[i])))
483 {
484 return false;
485 }
486 }
487
488 return true;
489 }
490
491 Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
492 {
493 if ((subtag.size() == 0) || (subtag.size() > 8))
494 kumpf 1.21 {
495 return false;
496 }
497
498 for (Uint32 i = 0, n = subtag.size(); i < n; i++)
499 {
500 if (!(isascii(subtag[i]) && isalnum(subtag[i])))
501 {
502 return false;
503 }
504 }
505
506 return true;
507 }
|
508 humberto 1.8
509 PEGASUS_NAMESPACE_END
|