(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.25 //%2006////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl     1.25 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12               // EMC Corporation; Symantec Corporation; The Open Group.
 13 humberto 1.8  //
 14               // Permission is hereby granted, free of charge, to any person obtaining a copy
 15               // of this software and associated documentation files (the "Software"), to
 16               // deal in the Software without restriction, including without limitation the
 17               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18               // sell copies of the Software, and to permit persons to whom the Software is
 19               // furnished to do so, subject to the following conditions:
 20               // 
 21               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29               //
 30               //==============================================================================
 31               //
 32               //%/////////////////////////////////////////////////////////////////////////////
 33               
 34 humberto 1.8  #include <Pegasus/Common/LanguageParser.h>
 35               #include <Pegasus/Common/InternalException.h>
 36               #include <Pegasus/Common/Tracer.h>
 37 kumpf    1.30 #include <Pegasus/Common/MessageLoader.h>
 38 humberto 1.8  #include <cstring>
 39               
 40 kumpf    1.21 #ifdef PEGASUS_HAS_ICU
 41               # include <unicode/locid.h>
 42               #endif
 43               #if defined(PEGASUS_OS_OS400)
 44 mike     1.27 # include "EBCDIC_OS400.h"
 45 kumpf    1.21 #endif
 46               
 47 humberto 1.8  PEGASUS_NAMESPACE_BEGIN
 48               
 49 kumpf    1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 50 kumpf    1.26 static char LOCALE_ID_SEPARATOR_CHAR = '_';
 51 kumpf    1.21 
 52 kumpf    1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 53 kumpf    1.21     const String& acceptLanguageHeader)
 54               {
 55                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 56               
 57 kumpf    1.23     AcceptLanguageList acceptLanguages;
 58 kumpf    1.21 
 59                   try
 60                   {
 61                       Array<String> languageElements;
 62                       LanguageParser::_parseLanguageHeader(
 63                           acceptLanguageHeader,
 64                           languageElements);
 65               
 66                       for (Uint32 i = 0; i < languageElements.size(); i++)
 67                       {
 68                           String languageTagString;
 69                           Real32 qualityValue;
 70                           LanguageParser::_parseAcceptLanguageElement(
 71                               languageElements[i], languageTagString, qualityValue);
 72 kumpf    1.30             acceptLanguages.insert(
 73                               LanguageTag(languageTagString), qualityValue);
 74 kumpf    1.21         }
 75                   }
 76                   catch (Exception& e)
 77                   {
 78                       throw InvalidAcceptLanguageHeader(e.getMessage());
 79                   }
 80               
 81                   PEG_METHOD_EXIT();
 82                   return acceptLanguages;
 83               }
 84               
 85 kumpf    1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
 86 kumpf    1.21     const String& contentLanguageHeader)
 87               {
 88                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 89               
 90 kumpf    1.23     ContentLanguageList contentLanguages;
 91 kumpf    1.21 
 92                   try
 93                   {
 94                       Array<String> languageElements;
 95                       LanguageParser::_parseLanguageHeader(
 96                           contentLanguageHeader,
 97                           languageElements);
 98               
 99                       for (Uint32 i = 0; i < languageElements.size(); i++)
100                       {
101                           contentLanguages.append(LanguageTag(languageElements[i]));
102                       }
103                   }
104                   catch (Exception& e)
105                   {
106                       throw InvalidContentLanguageHeader(e.getMessage());
107                   }
108               
109                   PEG_METHOD_EXIT();
110                   return contentLanguages;
111               }
112 kumpf    1.21 
113               void LanguageParser::parseLanguageTag(
114 kumpf    1.24     const String& languageTagString,
115 kumpf    1.21     String& language,
116                   String& country,
117                   String& variant)
118               {
119                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
120               
121                   language.clear();
122                   country.clear();
123                   variant.clear();
124               
125                   if (languageTagString == "*")
126                   {
127                       // Parsing and validation is complete
128                       PEG_METHOD_EXIT();
129                       return;
130                   }
131               
132                   Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
133                   Array<String> subtags;
134               
135                   _parseLanguageSubtags(subtags, languageTagString);
136 kumpf    1.21 
137                   // _parseLanguageSubtags() always returns at least one subtag.
138                   PEGASUS_ASSERT(subtags.size() > 0);
139               
140                   // Validate the primary subtag.
141                   // Given a languageTagString "en-US-mn" the language is "en".
142               
143                   language = subtags[0];
144               
145                   if ((language == "i") || (language == "x"))
146                   {
147                       // These primary tags are allowed, but are not ISO 639 compliant
148                       isStandardFormat = false;
149                       language.clear();
150                   }
151                   else if ((language.size() != 2) && (language.size() != 3))
152                   {
153                       // Except for "i" and "x", primary tags must be 2 or 3 characters,
154                       // according to RFC 3066.
155                       MessageLoaderParms parms(
156                           "Common.LanguageParser.INVALID_LANGUAGE_TAG",
157 kumpf    1.21             "Invalid language tag \"$0\".", languageTagString);
158                       PEG_METHOD_EXIT();
159 marek    1.29         // throw Exception(MessageLoader::getMessage(parms));
160 marek    1.28         // do not localize message, requires a language tag for this
161                       // localization can cause recursion here
162                       throw Exception(parms.toString());
163 kumpf    1.21     }
164               
165                   if (subtags.size() == 1)
166                   {
167                       // If only the primary subtag is present, we are done!
168                       PEG_METHOD_EXIT();
169                       return;
170                   }
171               
172                   // Validate the second subtag.
173                   // Given a languageTagString "en-US-mn" the country is "US".
174               
175                   if (subtags[1].size() == 1)
176                   {
177                       // The second subtag may not be a single character according to
178                       // RFC 3066.
179                       MessageLoaderParms parms(
180                           "Common.LanguageParser.INVALID_LANGUAGE_TAG",
181                           "Invalid language tag \"$0\".", languageTagString);
182                       PEG_METHOD_EXIT();
183 marek    1.29         // throw Exception(MessageLoader::getMessage(parms));
184 marek    1.28         // do not localize message, requires a language tag for this
185                       // localization can cause recursion here
186                       throw Exception(parms.toString());
187 kumpf    1.21     }
188               
189                   if (isStandardFormat)
190                   {
191                       Uint32 variantIndex = 1;
192               
193                       if (subtags[1].size() == 2)
194                       {
195                           country = subtags[1];
196                           variantIndex = 2;
197                       }
198               
199                       Uint32 numSubtags = subtags.size();
200               
201                       if (variantIndex < numSubtags)
202                       {
203                           variant = subtags[variantIndex++];
204               
205                           while (variantIndex < numSubtags)
206                           {
207                               variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
208 kumpf    1.21                 variant.append(subtags[variantIndex++]);
209                           }
210                       }
211                   }
212               
213                   PEG_METHOD_EXIT();
214               }
215               
216               void LanguageParser::validateQualityValue(Real32 quality)
217               {
218                   if ((quality > 1.0) || (quality < 0.0))
219                   {
220                       MessageLoaderParms parms(
221                           "Common.LanguageParser.INVALID_QUALITY_VALUE",
222                           "AcceptLanguage contains an invalid quality value");
223                       throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
224                   }
225               }
226               
227               String LanguageParser::buildAcceptLanguageHeader(
228 kumpf    1.23     const AcceptLanguageList& acceptLanguages)
229 kumpf    1.21 {
230                   String alString;
231                   Uint32 numAcceptLanguages = acceptLanguages.size();
232               
233                   for (Uint32 i = 0; i < numAcceptLanguages; i++)
234                   {
235                       alString.append(acceptLanguages.getLanguageTag(i).toString());
236               
237                       Real32 q = acceptLanguages.getQualityValue(i);
238                       if (q != 1.0)
239                       {
240                           char qValueString[6];
241                           sprintf(qValueString, "%4.3f", q);
242                           alString.append(";q=");
243                           alString.append(qValueString);
244                       }
245               
246                       if (i < numAcceptLanguages - 1)
247                       {
248                           alString.append(",");
249                       }
250 kumpf    1.21     }
251               
252                   return alString;
253               }
254               
255               String LanguageParser::buildContentLanguageHeader(
256 kumpf    1.23     const ContentLanguageList& contentLanguages)
257 kumpf    1.21 {
258                   String clString;
259                   Uint32 numContentLanguages = contentLanguages.size();
260               
261                   for (Uint32 i = 0; i < numContentLanguages; i++)
262                   {
263                       clString.append(contentLanguages.getLanguageTag(i).toString());
264               
265                       if (i < numContentLanguages - 1)
266                       {
267                           clString.append(",");
268                       }
269                   }
270               
271                   return clString;
272               }
273               
274 kumpf    1.26 #ifdef PEGASUS_HAS_ICU
275               String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
276               {
277                   Uint32 index = 0;
278                   while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
279                               PEG_NOT_FOUND)
280                   {
281                       localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
282                   }
283                   return localeId;
284               }
285               #endif
286               
287 kumpf    1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
288 kumpf    1.21 {
289               #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
290                   Locale default_loc = Locale::getDefault();
291               
292               # ifdef PEGASUS_OS_OS400
293                   char* tmp = (char*)default_loc.getName();
294                   char tmp_[100];
295                   EtoA(strcpy(tmp_,tmp));
296 kumpf    1.26     String localeId = tmp_;
297 kumpf    1.21 # else
298 kumpf    1.26     String localeId = default_loc.getName();
299               # endif
300               
301 kumpf    1.21     try
302                   {
303 kumpf    1.26         return LanguageParser::parseAcceptLanguageHeader(
304                           convertLocaleIdToLanguageTag(localeId));
305 kumpf    1.21     }
306                   catch (const InvalidAcceptLanguageHeader& e)
307                   {
308                       Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
309                          "src.Server.cimserver.FAILED_TO_GET_PROCESS_LOCALE",
310                          "Could not convert the system locale to a valid accept-language "
311                              "format");
312                       Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
313                           e.getMessage());
314 kumpf    1.23         AcceptLanguageList al;
315 kumpf    1.22         al.insert(LanguageTag("*"), 1);
316                       return al;
317 kumpf    1.21     }
318 kumpf    1.22 #else
319 kumpf    1.23     return AcceptLanguageList();
320 kumpf    1.21 #endif
321               }
322               
323               void LanguageParser::_parseLanguageHeader(
324                   const String& languageHeaderValue,
325                   Array<String>& languageElements)
326               {
327                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
328               
329                   languageElements.clear();
330                   String element;
331               
332                   for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
333                   {
334                       Char16 nextChar = languageHeaderValue[i];
335               
336                       if (isascii(nextChar) && isspace(nextChar))
337                       {
338                           // Ignore whitespace
339                       }
340                       else if (nextChar == '(')
341 kumpf    1.21         {
342                           // Ignore comments
343                           while (i < len)
344                           {
345                               // Search for the closing parenthesis
346                               if (languageHeaderValue[i] == ')')
347                               {
348                                   break;
349                               }
350               
351                               // Skip over escape characters
352                               if (languageHeaderValue[i] == '\\')
353                               {
354                                   i++;
355                               }
356               
357                               i++;
358                           }
359               
360                           // Check for a missing closing parenthesis
361                           if (i >= len)
362 kumpf    1.21             {
363                               MessageLoaderParms parms(
364                                   "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
365                                   "Closing \")\" character is missing.");
366                               throw Exception(MessageLoader::getMessage(parms));
367                           }
368                       }
369                       else if (nextChar == ',')
370                       {
371                           // Check for the end of the element
372                           languageElements.append(element);
373                           element.clear();
374                       }
375                       else
376                       {
377                           // Unescape an escape character
378                           if ((nextChar == '\\') && (i < len-1))
379                           {
380                               nextChar = languageHeaderValue[++i];
381                           }
382               
383 kumpf    1.21             // Include this character in the value
384                           element.append(nextChar);
385                       }
386                   }
387               
388                   // Include the last element in the languageElements array
389                   languageElements.append(element);
390               
391                   PEG_METHOD_EXIT();
392               }
393               
394               void LanguageParser::_parseAcceptLanguageElement(
395                   const String& acceptLanguageElement,
396                   String& languageTag,
397                   Real32& quality)
398               {
399                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
400               
401                   // look for ';' in acceptLanguageElement, that means we have a
402                   // quality value to capture.  If not present, we only have a language tag.
403               
404 kumpf    1.21     Uint32 semicolonIndex = acceptLanguageElement.find(";");
405                   if (semicolonIndex != PEG_NOT_FOUND)
406                   {
407                       // Separate the language tag and quality value
408               
409                       String qualityString =
410                           acceptLanguageElement.subString(semicolonIndex+1);
411                       languageTag = acceptLanguageElement.subString(0, semicolonIndex);
412               
413                       // Parse the quality value
414 humberto 1.8  
415 kumpf    1.21         char dummyChar;
416                       int scanfConversions = sscanf(
417                           qualityString.getCString(),
418                           "q=%f%c", &quality, &dummyChar);
419               
420                       if ((scanfConversions != 1) ||
421                           (qualityString.size() > 7))
422                       {
423                           MessageLoaderParms parms(
424                               "Common.LanguageParser.INVALID_QUALITY_VALUE",
425                               "AcceptLanguage contains an invalid quality value");
426                           PEG_METHOD_EXIT();
427                           throw Exception(MessageLoader::getMessage(parms));
428                       }
429                   }
430                   else
431                   {
432                       languageTag = acceptLanguageElement;
433                       quality = 1.0;
434                   }
435               
436 kumpf    1.21     PEG_METHOD_EXIT();
437               }
438               
439               void LanguageParser::_parseLanguageSubtags(
440                   Array<String>& subtags,
441                   const String& languageTagString)
442               {
443                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
444               
445                   // Parse the language tag into subtags
446               
447                   Uint32 subtagIndex = 0;
448                   Uint32 separatorIndex;
449                   while ((separatorIndex = languageTagString.find(
450                               subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
451                   {
452                       subtags.append(languageTagString.subString(
453                           subtagIndex, separatorIndex - subtagIndex));
454                       subtagIndex = separatorIndex + 1;
455                   }
456                   subtags.append(languageTagString.subString(subtagIndex));
457 kumpf    1.21 
458                   // Validate the syntax of each of the subtags
459               
460                   for (Uint32 i = 0, n = subtags.size(); i < n; i++)
461                   {
462                       if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
463                           ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
464                       {
465                           MessageLoaderParms parms(
466                               "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
467                               "Malformed language tag \"$0\".", languageTagString);
468                           PEG_METHOD_EXIT();
469 marek    1.29             // throw Exception(MessageLoader::getMessage(parms));
470 marek    1.28             // do not localize message, requires a language tag for this
471                           // localization can cause recursion here
472                           throw Exception(parms.toString());
473 kumpf    1.21         }
474                   }
475               
476                   PEG_METHOD_EXIT();
477 humberto 1.8  }
478               
479 kumpf    1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
480               {
481                   if ((subtag.size() == 0) || (subtag.size() > 8))
482                   {
483                       return false;
484                   }
485               
486                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
487                   {
488                       if (!(isascii(subtag[i]) && isalpha(subtag[i])))
489                       {
490                           return false;
491                       }
492                   }
493               
494                   return true;
495               }
496               
497               Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
498               {
499                   if ((subtag.size() == 0) || (subtag.size() > 8))
500 kumpf    1.21     {
501                       return false;
502                   }
503               
504                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
505                   {
506                       if (!(isascii(subtag[i]) && isalnum(subtag[i])))
507                       {
508                           return false;
509                       }
510                   }
511               
512                   return true;
513               }
514 humberto 1.8  
515               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2