(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.25 //%2006////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl     1.25 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12               // EMC Corporation; Symantec Corporation; The Open Group.
 13 humberto 1.8  //
 14               // Permission is hereby granted, free of charge, to any person obtaining a copy
 15               // of this software and associated documentation files (the "Software"), to
 16               // deal in the Software without restriction, including without limitation the
 17               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18               // sell copies of the Software, and to permit persons to whom the Software is
 19               // furnished to do so, subject to the following conditions:
 20               // 
 21               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29               //
 30               //==============================================================================
 31               //
 32               // Author: Humberto Rivero (hurivero@us.ibm.com)
 33               //
 34 aruran.ms 1.15 // Modified By: Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3697, 3698, 3699, 3700
 35 aruran.ms 1.19 //              Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3701, 3702, 3703, 3704
 36 kumpf     1.21 //              Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
 37 humberto  1.8  //
 38                //%/////////////////////////////////////////////////////////////////////////////
 39                
 40                #include <Pegasus/Common/LanguageParser.h>
 41                #include <Pegasus/Common/InternalException.h>
 42                #include <Pegasus/Common/Tracer.h>
 43                #include <Pegasus/Common/MessageLoader.h> //l10n
 44                #include <cstring>
 45                
 46 kumpf     1.21 #ifdef PEGASUS_HAS_ICU
 47                # include <unicode/locid.h>
 48                #endif
 49                #if defined(PEGASUS_OS_OS400)
 50                # include "OS400ConvertChar.h"
 51                #endif
 52                
 53 humberto  1.8  PEGASUS_NAMESPACE_BEGIN
 54                
 55 kumpf     1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 56 kumpf     1.25.2.1 static char LOCALE_ID_SEPARATOR_CHAR = '_';
 57 kumpf     1.21     
 58 kumpf     1.23     AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 59 kumpf     1.21         const String& acceptLanguageHeader)
 60                    {
 61                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 62                    
 63 kumpf     1.23         AcceptLanguageList acceptLanguages;
 64 kumpf     1.21     
 65                        try
 66                        {
 67                            Array<String> languageElements;
 68                            LanguageParser::_parseLanguageHeader(
 69                                acceptLanguageHeader,
 70                                languageElements);
 71                    
 72                            for (Uint32 i = 0; i < languageElements.size(); i++)
 73                            {
 74                                String languageTagString;
 75                                Real32 qualityValue;
 76                                LanguageParser::_parseAcceptLanguageElement(
 77                                    languageElements[i], languageTagString, qualityValue);
 78                                acceptLanguages.insert(LanguageTag(languageTagString), qualityValue);
 79                            }
 80                        }
 81                        catch (Exception& e)
 82                        {
 83                            throw InvalidAcceptLanguageHeader(e.getMessage());
 84                        }
 85 kumpf     1.21     
 86                        PEG_METHOD_EXIT();
 87                        return acceptLanguages;
 88                    }
 89                    
 90 kumpf     1.23     ContentLanguageList LanguageParser::parseContentLanguageHeader(
 91 kumpf     1.21         const String& contentLanguageHeader)
 92                    {
 93                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 94                    
 95 kumpf     1.23         ContentLanguageList contentLanguages;
 96 kumpf     1.21     
 97                        try
 98                        {
 99                            Array<String> languageElements;
100                            LanguageParser::_parseLanguageHeader(
101                                contentLanguageHeader,
102                                languageElements);
103                    
104                            for (Uint32 i = 0; i < languageElements.size(); i++)
105                            {
106                                contentLanguages.append(LanguageTag(languageElements[i]));
107                            }
108                        }
109                        catch (Exception& e)
110                        {
111                            throw InvalidContentLanguageHeader(e.getMessage());
112                        }
113                    
114                        PEG_METHOD_EXIT();
115                        return contentLanguages;
116                    }
117 kumpf     1.21     
118                    void LanguageParser::parseLanguageTag(
119 kumpf     1.24         const String& languageTagString,
120 kumpf     1.21         String& language,
121                        String& country,
122                        String& variant)
123                    {
124                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
125                    
126                        language.clear();
127                        country.clear();
128                        variant.clear();
129                    
130                        if (languageTagString == "*")
131                        {
132                            // Parsing and validation is complete
133                            PEG_METHOD_EXIT();
134                            return;
135                        }
136                    
137                        Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
138                        Array<String> subtags;
139                    
140                        _parseLanguageSubtags(subtags, languageTagString);
141 kumpf     1.21     
142                        // _parseLanguageSubtags() always returns at least one subtag.
143                        PEGASUS_ASSERT(subtags.size() > 0);
144                    
145                        // Validate the primary subtag.
146                        // Given a languageTagString "en-US-mn" the language is "en".
147                    
148                        language = subtags[0];
149                    
150                        if ((language == "i") || (language == "x"))
151                        {
152                            // These primary tags are allowed, but are not ISO 639 compliant
153                            isStandardFormat = false;
154                            language.clear();
155                        }
156                        else if ((language.size() != 2) && (language.size() != 3))
157                        {
158                            // Except for "i" and "x", primary tags must be 2 or 3 characters,
159                            // according to RFC 3066.
160                            MessageLoaderParms parms(
161                                "Common.LanguageParser.INVALID_LANGUAGE_TAG",
162 kumpf     1.21                 "Invalid language tag \"$0\".", languageTagString);
163                            PEG_METHOD_EXIT();
164 kumpf     1.25.2.2         // Do not localize message.  Requiring a language tag for this
165                            // localization can cause recursion with the MessageLoader.
166                            throw Exception(parms.toString());
167 kumpf     1.21         }
168                    
169                        if (subtags.size() == 1)
170                        {
171                            // If only the primary subtag is present, we are done!
172                            PEG_METHOD_EXIT();
173                            return;
174                        }
175                    
176                        // Validate the second subtag.
177                        // Given a languageTagString "en-US-mn" the country is "US".
178                    
179                        if (subtags[1].size() == 1)
180                        {
181                            // The second subtag may not be a single character according to
182                            // RFC 3066.
183                            MessageLoaderParms parms(
184                                "Common.LanguageParser.INVALID_LANGUAGE_TAG",
185                                "Invalid language tag \"$0\".", languageTagString);
186                            PEG_METHOD_EXIT();
187 kumpf     1.25.2.2         // Do not localize message.  Requiring a language tag for this
188                            // localization can cause recursion with the MessageLoader.
189                            throw Exception(parms.toString());
190 kumpf     1.21         }
191                    
192                        if (isStandardFormat)
193                        {
194                            Uint32 variantIndex = 1;
195                    
196                            if (subtags[1].size() == 2)
197                            {
198                                country = subtags[1];
199                                variantIndex = 2;
200                            }
201                    
202                            Uint32 numSubtags = subtags.size();
203                    
204                            if (variantIndex < numSubtags)
205                            {
206                                variant = subtags[variantIndex++];
207                    
208                                while (variantIndex < numSubtags)
209                                {
210                                    variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
211 kumpf     1.21                     variant.append(subtags[variantIndex++]);
212                                }
213                            }
214                        }
215                    
216                        PEG_METHOD_EXIT();
217                    }
218                    
219                    void LanguageParser::validateQualityValue(Real32 quality)
220                    {
221                        if ((quality > 1.0) || (quality < 0.0))
222                        {
223                            MessageLoaderParms parms(
224                                "Common.LanguageParser.INVALID_QUALITY_VALUE",
225                                "AcceptLanguage contains an invalid quality value");
226                            throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
227                        }
228                    }
229                    
230                    String LanguageParser::buildAcceptLanguageHeader(
231 kumpf     1.23         const AcceptLanguageList& acceptLanguages)
232 kumpf     1.21     {
233                        String alString;
234                        Uint32 numAcceptLanguages = acceptLanguages.size();
235                    
236                        for (Uint32 i = 0; i < numAcceptLanguages; i++)
237                        {
238                            alString.append(acceptLanguages.getLanguageTag(i).toString());
239                    
240                            Real32 q = acceptLanguages.getQualityValue(i);
241                            if (q != 1.0)
242                            {
243                                char qValueString[6];
244                                sprintf(qValueString, "%4.3f", q);
245                                alString.append(";q=");
246                                alString.append(qValueString);
247                            }
248                    
249                            if (i < numAcceptLanguages - 1)
250                            {
251                                alString.append(",");
252                            }
253 kumpf     1.21         }
254                    
255                        return alString;
256                    }
257                    
258                    String LanguageParser::buildContentLanguageHeader(
259 kumpf     1.23         const ContentLanguageList& contentLanguages)
260 kumpf     1.21     {
261                        String clString;
262                        Uint32 numContentLanguages = contentLanguages.size();
263                    
264                        for (Uint32 i = 0; i < numContentLanguages; i++)
265                        {
266                            clString.append(contentLanguages.getLanguageTag(i).toString());
267                    
268                            if (i < numContentLanguages - 1)
269                            {
270                                clString.append(",");
271                            }
272                        }
273                    
274                        return clString;
275                    }
276                    
277 kumpf     1.25.2.1 #ifdef PEGASUS_HAS_ICU
278                    String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
279                    {
280                        Uint32 index = 0;
281                        while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
282                                    PEG_NOT_FOUND)
283                        {
284                            localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
285                        }
286                        return localeId;
287                    }
288                    #endif
289                    
290 kumpf     1.23     AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
291 kumpf     1.21     {
292                    #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
293                        Locale default_loc = Locale::getDefault();
294                    
295                    # ifdef PEGASUS_OS_OS400
296                        char* tmp = (char*)default_loc.getName();
297                        char tmp_[100];
298                        EtoA(strcpy(tmp_,tmp));
299 kumpf     1.25.2.1     String localeId = tmp_;
300 kumpf     1.21     # else
301 kumpf     1.25.2.1     String localeId = default_loc.getName();
302                    # endif
303                    
304 kumpf     1.21         try
305                        {
306 kumpf     1.25.2.1         return LanguageParser::parseAcceptLanguageHeader(
307                                convertLocaleIdToLanguageTag(localeId));
308 kumpf     1.21         }
309                        catch (const InvalidAcceptLanguageHeader& e)
310                        {
311                            Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
312                               "src.Server.cimserver.FAILED_TO_GET_PROCESS_LOCALE",
313                               "Could not convert the system locale to a valid accept-language "
314                                   "format");
315                            Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
316                                e.getMessage());
317 kumpf     1.23             AcceptLanguageList al;
318 kumpf     1.22             al.insert(LanguageTag("*"), 1);
319                            return al;
320 kumpf     1.21         }
321 kumpf     1.22     #else
322 kumpf     1.23         return AcceptLanguageList();
323 kumpf     1.21     #endif
324                    }
325                    
326                    void LanguageParser::_parseLanguageHeader(
327                        const String& languageHeaderValue,
328                        Array<String>& languageElements)
329                    {
330                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
331                    
332                        languageElements.clear();
333                        String element;
334                    
335                        for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
336                        {
337                            Char16 nextChar = languageHeaderValue[i];
338                    
339                            if (isascii(nextChar) && isspace(nextChar))
340                            {
341                                // Ignore whitespace
342                            }
343                            else if (nextChar == '(')
344 kumpf     1.21             {
345                                // Ignore comments
346                                while (i < len)
347                                {
348                                    // Search for the closing parenthesis
349                                    if (languageHeaderValue[i] == ')')
350                                    {
351                                        break;
352                                    }
353                    
354                                    // Skip over escape characters
355                                    if (languageHeaderValue[i] == '\\')
356                                    {
357                                        i++;
358                                    }
359                    
360                                    i++;
361                                }
362                    
363                                // Check for a missing closing parenthesis
364                                if (i >= len)
365 kumpf     1.21                 {
366                                    MessageLoaderParms parms(
367                                        "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
368                                        "Closing \")\" character is missing.");
369                                    throw Exception(MessageLoader::getMessage(parms));
370                                }
371                            }
372                            else if (nextChar == ',')
373                            {
374                                // Check for the end of the element
375                                languageElements.append(element);
376                                element.clear();
377                            }
378                            else
379                            {
380                                // Unescape an escape character
381                                if ((nextChar == '\\') && (i < len-1))
382                                {
383                                    nextChar = languageHeaderValue[++i];
384                                }
385                    
386 kumpf     1.21                 // Include this character in the value
387                                element.append(nextChar);
388                            }
389                        }
390                    
391                        // Include the last element in the languageElements array
392                        languageElements.append(element);
393                    
394                        PEG_METHOD_EXIT();
395                    }
396                    
397                    void LanguageParser::_parseAcceptLanguageElement(
398                        const String& acceptLanguageElement,
399                        String& languageTag,
400                        Real32& quality)
401                    {
402                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
403                    
404                        // look for ';' in acceptLanguageElement, that means we have a
405                        // quality value to capture.  If not present, we only have a language tag.
406                    
407 kumpf     1.21         Uint32 semicolonIndex = acceptLanguageElement.find(";");
408                        if (semicolonIndex != PEG_NOT_FOUND)
409                        {
410                            // Separate the language tag and quality value
411                    
412                            String qualityString =
413                                acceptLanguageElement.subString(semicolonIndex+1);
414                            languageTag = acceptLanguageElement.subString(0, semicolonIndex);
415                    
416                            // Parse the quality value
417 humberto  1.8      
418 kumpf     1.21             char dummyChar;
419                            int scanfConversions = sscanf(
420                                qualityString.getCString(),
421                                "q=%f%c", &quality, &dummyChar);
422                    
423                            if ((scanfConversions != 1) ||
424                                (qualityString.size() > 7))
425                            {
426                                MessageLoaderParms parms(
427                                    "Common.LanguageParser.INVALID_QUALITY_VALUE",
428                                    "AcceptLanguage contains an invalid quality value");
429                                PEG_METHOD_EXIT();
430                                throw Exception(MessageLoader::getMessage(parms));
431                            }
432                        }
433                        else
434                        {
435                            languageTag = acceptLanguageElement;
436                            quality = 1.0;
437                        }
438                    
439 kumpf     1.21         PEG_METHOD_EXIT();
440                    }
441                    
442                    void LanguageParser::_parseLanguageSubtags(
443                        Array<String>& subtags,
444                        const String& languageTagString)
445                    {
446                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
447                    
448                        // Parse the language tag into subtags
449                    
450                        Uint32 subtagIndex = 0;
451                        Uint32 separatorIndex;
452                        while ((separatorIndex = languageTagString.find(
453                                    subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
454                        {
455                            subtags.append(languageTagString.subString(
456                                subtagIndex, separatorIndex - subtagIndex));
457                            subtagIndex = separatorIndex + 1;
458                        }
459                        subtags.append(languageTagString.subString(subtagIndex));
460 kumpf     1.21     
461                        // Validate the syntax of each of the subtags
462                    
463                        for (Uint32 i = 0, n = subtags.size(); i < n; i++)
464                        {
465                            if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
466                                ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
467                            {
468                                MessageLoaderParms parms(
469                                    "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
470                                    "Malformed language tag \"$0\".", languageTagString);
471                                PEG_METHOD_EXIT();
472 kumpf     1.25.2.2             // Do not localize message.  Requiring a language tag for this
473                                // localization can cause recursion with the MessageLoader.
474                                throw Exception(parms.toString());
475 kumpf     1.21             }
476                        }
477                    
478                        PEG_METHOD_EXIT();
479 humberto  1.8      }
480                    
481 kumpf     1.21     Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
482                    {
483                        if ((subtag.size() == 0) || (subtag.size() > 8))
484                        {
485                            return false;
486                        }
487                    
488                        for (Uint32 i = 0, n = subtag.size(); i < n; i++)
489                        {
490                            if (!(isascii(subtag[i]) && isalpha(subtag[i])))
491                            {
492                                return false;
493                            }
494                        }
495                    
496                        return true;
497                    }
498                    
499                    Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
500                    {
501                        if ((subtag.size() == 0) || (subtag.size() > 8))
502 kumpf     1.21         {
503                            return false;
504                        }
505                    
506                        for (Uint32 i = 0, n = subtag.size(); i < n; i++)
507                        {
508                            if (!(isascii(subtag[i]) && isalnum(subtag[i])))
509                            {
510                                return false;
511                            }
512                        }
513                    
514                        return true;
515                    }
516 humberto  1.8      
517                    PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2