(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.25 //%2006////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl     1.25 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12               // EMC Corporation; Symantec Corporation; The Open Group.
 13 humberto 1.8  //
 14               // Permission is hereby granted, free of charge, to any person obtaining a copy
 15               // of this software and associated documentation files (the "Software"), to
 16               // deal in the Software without restriction, including without limitation the
 17               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18               // sell copies of the Software, and to permit persons to whom the Software is
 19               // furnished to do so, subject to the following conditions:
 20               // 
 21               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29               //
 30               //==============================================================================
 31               //
 32               //%/////////////////////////////////////////////////////////////////////////////
 33               
 34 humberto 1.8  #include <Pegasus/Common/LanguageParser.h>
 35               #include <Pegasus/Common/InternalException.h>
 36               #include <Pegasus/Common/Tracer.h>
 37               #include <Pegasus/Common/MessageLoader.h> //l10n
 38               #include <cstring>
 39               
 40 kumpf    1.21 #ifdef PEGASUS_HAS_ICU
 41               # include <unicode/locid.h>
 42               #endif
 43               #if defined(PEGASUS_OS_OS400)
 44 mike     1.27 # include "EBCDIC_OS400.h"
 45 kumpf    1.21 #endif
 46               
 47 humberto 1.8  PEGASUS_NAMESPACE_BEGIN
 48               
 49 kumpf    1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 50 kumpf    1.26 static char LOCALE_ID_SEPARATOR_CHAR = '_';
 51 kumpf    1.21 
 52 kumpf    1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 53 kumpf    1.21     const String& acceptLanguageHeader)
 54               {
 55                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 56               
 57 kumpf    1.23     AcceptLanguageList acceptLanguages;
 58 kumpf    1.21 
 59                   try
 60                   {
 61                       Array<String> languageElements;
 62                       LanguageParser::_parseLanguageHeader(
 63                           acceptLanguageHeader,
 64                           languageElements);
 65               
 66                       for (Uint32 i = 0; i < languageElements.size(); i++)
 67                       {
 68                           String languageTagString;
 69                           Real32 qualityValue;
 70                           LanguageParser::_parseAcceptLanguageElement(
 71                               languageElements[i], languageTagString, qualityValue);
 72                           acceptLanguages.insert(LanguageTag(languageTagString), qualityValue);
 73                       }
 74                   }
 75                   catch (Exception& e)
 76                   {
 77                       throw InvalidAcceptLanguageHeader(e.getMessage());
 78                   }
 79 kumpf    1.21 
 80                   PEG_METHOD_EXIT();
 81                   return acceptLanguages;
 82               }
 83               
 84 kumpf    1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
 85 kumpf    1.21     const String& contentLanguageHeader)
 86               {
 87                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 88               
 89 kumpf    1.23     ContentLanguageList contentLanguages;
 90 kumpf    1.21 
 91                   try
 92                   {
 93                       Array<String> languageElements;
 94                       LanguageParser::_parseLanguageHeader(
 95                           contentLanguageHeader,
 96                           languageElements);
 97               
 98                       for (Uint32 i = 0; i < languageElements.size(); i++)
 99                       {
100                           contentLanguages.append(LanguageTag(languageElements[i]));
101                       }
102                   }
103                   catch (Exception& e)
104                   {
105                       throw InvalidContentLanguageHeader(e.getMessage());
106                   }
107               
108                   PEG_METHOD_EXIT();
109                   return contentLanguages;
110               }
111 kumpf    1.21 
112               void LanguageParser::parseLanguageTag(
113 kumpf    1.24     const String& languageTagString,
114 kumpf    1.21     String& language,
115                   String& country,
116                   String& variant)
117               {
118                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
119               
120                   language.clear();
121                   country.clear();
122                   variant.clear();
123               
124                   if (languageTagString == "*")
125                   {
126                       // Parsing and validation is complete
127                       PEG_METHOD_EXIT();
128                       return;
129                   }
130               
131                   Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
132                   Array<String> subtags;
133               
134                   _parseLanguageSubtags(subtags, languageTagString);
135 kumpf    1.21 
136                   // _parseLanguageSubtags() always returns at least one subtag.
137                   PEGASUS_ASSERT(subtags.size() > 0);
138               
139                   // Validate the primary subtag.
140                   // Given a languageTagString "en-US-mn" the language is "en".
141               
142                   language = subtags[0];
143               
144                   if ((language == "i") || (language == "x"))
145                   {
146                       // These primary tags are allowed, but are not ISO 639 compliant
147                       isStandardFormat = false;
148                       language.clear();
149                   }
150                   else if ((language.size() != 2) && (language.size() != 3))
151                   {
152                       // Except for "i" and "x", primary tags must be 2 or 3 characters,
153                       // according to RFC 3066.
154                       MessageLoaderParms parms(
155                           "Common.LanguageParser.INVALID_LANGUAGE_TAG",
156 kumpf    1.21             "Invalid language tag \"$0\".", languageTagString);
157                       PEG_METHOD_EXIT();
158 marek    1.29         // throw Exception(MessageLoader::getMessage(parms));
159 marek    1.28         // do not localize message, requires a language tag for this
160                       // localization can cause recursion here
161                       throw Exception(parms.toString());
162 kumpf    1.21     }
163               
164                   if (subtags.size() == 1)
165                   {
166                       // If only the primary subtag is present, we are done!
167                       PEG_METHOD_EXIT();
168                       return;
169                   }
170               
171                   // Validate the second subtag.
172                   // Given a languageTagString "en-US-mn" the country is "US".
173               
174                   if (subtags[1].size() == 1)
175                   {
176                       // The second subtag may not be a single character according to
177                       // RFC 3066.
178                       MessageLoaderParms parms(
179                           "Common.LanguageParser.INVALID_LANGUAGE_TAG",
180                           "Invalid language tag \"$0\".", languageTagString);
181                       PEG_METHOD_EXIT();
182 marek    1.29         // throw Exception(MessageLoader::getMessage(parms));
183 marek    1.28         // do not localize message, requires a language tag for this
184                       // localization can cause recursion here
185                       throw Exception(parms.toString());
186 kumpf    1.21     }
187               
188                   if (isStandardFormat)
189                   {
190                       Uint32 variantIndex = 1;
191               
192                       if (subtags[1].size() == 2)
193                       {
194                           country = subtags[1];
195                           variantIndex = 2;
196                       }
197               
198                       Uint32 numSubtags = subtags.size();
199               
200                       if (variantIndex < numSubtags)
201                       {
202                           variant = subtags[variantIndex++];
203               
204                           while (variantIndex < numSubtags)
205                           {
206                               variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
207 kumpf    1.21                 variant.append(subtags[variantIndex++]);
208                           }
209                       }
210                   }
211               
212                   PEG_METHOD_EXIT();
213               }
214               
215               void LanguageParser::validateQualityValue(Real32 quality)
216               {
217                   if ((quality > 1.0) || (quality < 0.0))
218                   {
219                       MessageLoaderParms parms(
220                           "Common.LanguageParser.INVALID_QUALITY_VALUE",
221                           "AcceptLanguage contains an invalid quality value");
222                       throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
223                   }
224               }
225               
226               String LanguageParser::buildAcceptLanguageHeader(
227 kumpf    1.23     const AcceptLanguageList& acceptLanguages)
228 kumpf    1.21 {
229                   String alString;
230                   Uint32 numAcceptLanguages = acceptLanguages.size();
231               
232                   for (Uint32 i = 0; i < numAcceptLanguages; i++)
233                   {
234                       alString.append(acceptLanguages.getLanguageTag(i).toString());
235               
236                       Real32 q = acceptLanguages.getQualityValue(i);
237                       if (q != 1.0)
238                       {
239                           char qValueString[6];
240                           sprintf(qValueString, "%4.3f", q);
241                           alString.append(";q=");
242                           alString.append(qValueString);
243                       }
244               
245                       if (i < numAcceptLanguages - 1)
246                       {
247                           alString.append(",");
248                       }
249 kumpf    1.21     }
250               
251                   return alString;
252               }
253               
254               String LanguageParser::buildContentLanguageHeader(
255 kumpf    1.23     const ContentLanguageList& contentLanguages)
256 kumpf    1.21 {
257                   String clString;
258                   Uint32 numContentLanguages = contentLanguages.size();
259               
260                   for (Uint32 i = 0; i < numContentLanguages; i++)
261                   {
262                       clString.append(contentLanguages.getLanguageTag(i).toString());
263               
264                       if (i < numContentLanguages - 1)
265                       {
266                           clString.append(",");
267                       }
268                   }
269               
270                   return clString;
271               }
272               
273 kumpf    1.26 #ifdef PEGASUS_HAS_ICU
274               String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
275               {
276                   Uint32 index = 0;
277                   while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
278                               PEG_NOT_FOUND)
279                   {
280                       localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
281                   }
282                   return localeId;
283               }
284               #endif
285               
286 kumpf    1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
287 kumpf    1.21 {
288               #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
289                   Locale default_loc = Locale::getDefault();
290               
291               # ifdef PEGASUS_OS_OS400
292                   char* tmp = (char*)default_loc.getName();
293                   char tmp_[100];
294                   EtoA(strcpy(tmp_,tmp));
295 kumpf    1.26     String localeId = tmp_;
296 kumpf    1.21 # else
297 kumpf    1.26     String localeId = default_loc.getName();
298               # endif
299               
300 kumpf    1.21     try
301                   {
302 kumpf    1.26         return LanguageParser::parseAcceptLanguageHeader(
303                           convertLocaleIdToLanguageTag(localeId));
304 kumpf    1.21     }
305                   catch (const InvalidAcceptLanguageHeader& e)
306                   {
307                       Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
308                          "src.Server.cimserver.FAILED_TO_GET_PROCESS_LOCALE",
309                          "Could not convert the system locale to a valid accept-language "
310                              "format");
311                       Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
312                           e.getMessage());
313 kumpf    1.23         AcceptLanguageList al;
314 kumpf    1.22         al.insert(LanguageTag("*"), 1);
315                       return al;
316 kumpf    1.21     }
317 kumpf    1.22 #else
318 kumpf    1.23     return AcceptLanguageList();
319 kumpf    1.21 #endif
320               }
321               
322               void LanguageParser::_parseLanguageHeader(
323                   const String& languageHeaderValue,
324                   Array<String>& languageElements)
325               {
326                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
327               
328                   languageElements.clear();
329                   String element;
330               
331                   for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
332                   {
333                       Char16 nextChar = languageHeaderValue[i];
334               
335                       if (isascii(nextChar) && isspace(nextChar))
336                       {
337                           // Ignore whitespace
338                       }
339                       else if (nextChar == '(')
340 kumpf    1.21         {
341                           // Ignore comments
342                           while (i < len)
343                           {
344                               // Search for the closing parenthesis
345                               if (languageHeaderValue[i] == ')')
346                               {
347                                   break;
348                               }
349               
350                               // Skip over escape characters
351                               if (languageHeaderValue[i] == '\\')
352                               {
353                                   i++;
354                               }
355               
356                               i++;
357                           }
358               
359                           // Check for a missing closing parenthesis
360                           if (i >= len)
361 kumpf    1.21             {
362                               MessageLoaderParms parms(
363                                   "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
364                                   "Closing \")\" character is missing.");
365                               throw Exception(MessageLoader::getMessage(parms));
366                           }
367                       }
368                       else if (nextChar == ',')
369                       {
370                           // Check for the end of the element
371                           languageElements.append(element);
372                           element.clear();
373                       }
374                       else
375                       {
376                           // Unescape an escape character
377                           if ((nextChar == '\\') && (i < len-1))
378                           {
379                               nextChar = languageHeaderValue[++i];
380                           }
381               
382 kumpf    1.21             // Include this character in the value
383                           element.append(nextChar);
384                       }
385                   }
386               
387                   // Include the last element in the languageElements array
388                   languageElements.append(element);
389               
390                   PEG_METHOD_EXIT();
391               }
392               
393               void LanguageParser::_parseAcceptLanguageElement(
394                   const String& acceptLanguageElement,
395                   String& languageTag,
396                   Real32& quality)
397               {
398                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
399               
400                   // look for ';' in acceptLanguageElement, that means we have a
401                   // quality value to capture.  If not present, we only have a language tag.
402               
403 kumpf    1.21     Uint32 semicolonIndex = acceptLanguageElement.find(";");
404                   if (semicolonIndex != PEG_NOT_FOUND)
405                   {
406                       // Separate the language tag and quality value
407               
408                       String qualityString =
409                           acceptLanguageElement.subString(semicolonIndex+1);
410                       languageTag = acceptLanguageElement.subString(0, semicolonIndex);
411               
412                       // Parse the quality value
413 humberto 1.8  
414 kumpf    1.21         char dummyChar;
415                       int scanfConversions = sscanf(
416                           qualityString.getCString(),
417                           "q=%f%c", &quality, &dummyChar);
418               
419                       if ((scanfConversions != 1) ||
420                           (qualityString.size() > 7))
421                       {
422                           MessageLoaderParms parms(
423                               "Common.LanguageParser.INVALID_QUALITY_VALUE",
424                               "AcceptLanguage contains an invalid quality value");
425                           PEG_METHOD_EXIT();
426                           throw Exception(MessageLoader::getMessage(parms));
427                       }
428                   }
429                   else
430                   {
431                       languageTag = acceptLanguageElement;
432                       quality = 1.0;
433                   }
434               
435 kumpf    1.21     PEG_METHOD_EXIT();
436               }
437               
438               void LanguageParser::_parseLanguageSubtags(
439                   Array<String>& subtags,
440                   const String& languageTagString)
441               {
442                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
443               
444                   // Parse the language tag into subtags
445               
446                   Uint32 subtagIndex = 0;
447                   Uint32 separatorIndex;
448                   while ((separatorIndex = languageTagString.find(
449                               subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
450                   {
451                       subtags.append(languageTagString.subString(
452                           subtagIndex, separatorIndex - subtagIndex));
453                       subtagIndex = separatorIndex + 1;
454                   }
455                   subtags.append(languageTagString.subString(subtagIndex));
456 kumpf    1.21 
457                   // Validate the syntax of each of the subtags
458               
459                   for (Uint32 i = 0, n = subtags.size(); i < n; i++)
460                   {
461                       if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
462                           ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
463                       {
464                           MessageLoaderParms parms(
465                               "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
466                               "Malformed language tag \"$0\".", languageTagString);
467                           PEG_METHOD_EXIT();
468 marek    1.29             // throw Exception(MessageLoader::getMessage(parms));
469 marek    1.28             // do not localize message, requires a language tag for this
470                           // localization can cause recursion here
471                           throw Exception(parms.toString());
472 kumpf    1.21         }
473                   }
474               
475                   PEG_METHOD_EXIT();
476 humberto 1.8  }
477               
478 kumpf    1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
479               {
480                   if ((subtag.size() == 0) || (subtag.size() > 8))
481                   {
482                       return false;
483                   }
484               
485                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
486                   {
487                       if (!(isascii(subtag[i]) && isalpha(subtag[i])))
488                       {
489                           return false;
490                       }
491                   }
492               
493                   return true;
494               }
495               
496               Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
497               {
498                   if ((subtag.size() == 0) || (subtag.size() > 8))
499 kumpf    1.21     {
500                       return false;
501                   }
502               
503                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
504                   {
505                       if (!(isascii(subtag[i]) && isalnum(subtag[i])))
506                       {
507                           return false;
508                       }
509                   }
510               
511                   return true;
512               }
513 humberto 1.8  
514               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2