(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.11 //%2005////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 humberto 1.8  //
 12               // Permission is hereby granted, free of charge, to any person obtaining a copy
 13               // of this software and associated documentation files (the "Software"), to
 14               // deal in the Software without restriction, including without limitation the
 15               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 16               // sell copies of the Software, and to permit persons to whom the Software is
 17               // furnished to do so, subject to the following conditions:
 18               // 
 19               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 20               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 21               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 22               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 23               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 24               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 25               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 26               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 27               //
 28               //==============================================================================
 29               //
 30               // Author: Humberto Rivero (hurivero@us.ibm.com)
 31               //
 32 aruran.ms 1.15 // Modified By: Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3697, 3698, 3699, 3700
 33 aruran.ms 1.19 //              Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3701, 3702, 3703, 3704
 34 kumpf     1.21 //              Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
 35 humberto  1.8  //
 36                //%/////////////////////////////////////////////////////////////////////////////
 37                
 38                #include <Pegasus/Common/LanguageParser.h>
 39                #include <Pegasus/Common/InternalException.h>
 40                #include <Pegasus/Common/Tracer.h>
 41                #include <Pegasus/Common/MessageLoader.h> //l10n
 42                #include <cstring>
 43                
 44 kumpf     1.21 #ifdef PEGASUS_HAS_ICU
 45                # include <unicode/locid.h>
 46                #endif
 47                #if defined(PEGASUS_OS_OS400)
 48                # include "OS400ConvertChar.h"
 49                #endif
 50                
 51 humberto  1.8  PEGASUS_NAMESPACE_BEGIN
 52                
 53 kumpf     1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 54                
 55 a.dunfey  1.22.4.1 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 56 kumpf     1.21         const String& acceptLanguageHeader)
 57                    {
 58                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 59                    
 60 a.dunfey  1.22.4.1     AcceptLanguageList acceptLanguages;
 61 kumpf     1.21     
 62                        try
 63                        {
 64                            Array<String> languageElements;
 65                            LanguageParser::_parseLanguageHeader(
 66                                acceptLanguageHeader,
 67                                languageElements);
 68                    
 69                            for (Uint32 i = 0; i < languageElements.size(); i++)
 70                            {
 71                                String languageTagString;
 72                                Real32 qualityValue;
 73                                LanguageParser::_parseAcceptLanguageElement(
 74                                    languageElements[i], languageTagString, qualityValue);
 75                                acceptLanguages.insert(LanguageTag(languageTagString), qualityValue);
 76                            }
 77                        }
 78                        catch (Exception& e)
 79                        {
 80                            throw InvalidAcceptLanguageHeader(e.getMessage());
 81                        }
 82 kumpf     1.21     
 83                        PEG_METHOD_EXIT();
 84                        return acceptLanguages;
 85                    }
 86                    
 87 a.dunfey  1.22.4.1 ContentLanguageList LanguageParser::parseContentLanguageHeader(
 88 kumpf     1.21         const String& contentLanguageHeader)
 89                    {
 90                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 91                    
 92 a.dunfey  1.22.4.1     ContentLanguageList contentLanguages;
 93 kumpf     1.21     
 94                        try
 95                        {
 96                            Array<String> languageElements;
 97                            LanguageParser::_parseLanguageHeader(
 98                                contentLanguageHeader,
 99                                languageElements);
100                    
101                            for (Uint32 i = 0; i < languageElements.size(); i++)
102                            {
103                                contentLanguages.append(LanguageTag(languageElements[i]));
104                            }
105                        }
106                        catch (Exception& e)
107                        {
108                            throw InvalidContentLanguageHeader(e.getMessage());
109                        }
110                    
111                        PEG_METHOD_EXIT();
112                        return contentLanguages;
113                    }
114 kumpf     1.21     
115                    void LanguageParser::parseLanguageTag(
116 a.dunfey  1.22.4.1     const String& languageTagString,
117 kumpf     1.21         String& language,
118                        String& country,
119                        String& variant)
120                    {
121                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
122                    
123                        language.clear();
124                        country.clear();
125                        variant.clear();
126                    
127                        if (languageTagString == "*")
128                        {
129                            // Parsing and validation is complete
130                            PEG_METHOD_EXIT();
131                            return;
132                        }
133                    
134                        Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
135                        Array<String> subtags;
136                    
137                        _parseLanguageSubtags(subtags, languageTagString);
138 kumpf     1.21     
139                        // _parseLanguageSubtags() always returns at least one subtag.
140                        PEGASUS_ASSERT(subtags.size() > 0);
141                    
142                        // Validate the primary subtag.
143                        // Given a languageTagString "en-US-mn" the language is "en".
144                    
145                        language = subtags[0];
146                    
147                        if ((language == "i") || (language == "x"))
148                        {
149                            // These primary tags are allowed, but are not ISO 639 compliant
150                            isStandardFormat = false;
151                            language.clear();
152                        }
153                        else if ((language.size() != 2) && (language.size() != 3))
154                        {
155                            // Except for "i" and "x", primary tags must be 2 or 3 characters,
156                            // according to RFC 3066.
157                            MessageLoaderParms parms(
158                                "Common.LanguageParser.INVALID_LANGUAGE_TAG",
159 kumpf     1.21                 "Invalid language tag \"$0\".", languageTagString);
160                            PEG_METHOD_EXIT();
161                            throw Exception(MessageLoader::getMessage(parms));
162                        }
163                    
164                        if (subtags.size() == 1)
165                        {
166                            // If only the primary subtag is present, we are done!
167                            PEG_METHOD_EXIT();
168                            return;
169                        }
170                    
171                        // Validate the second subtag.
172                        // Given a languageTagString "en-US-mn" the country is "US".
173                    
174                        if (subtags[1].size() == 1)
175                        {
176                            // The second subtag may not be a single character according to
177                            // RFC 3066.
178                            MessageLoaderParms parms(
179                                "Common.LanguageParser.INVALID_LANGUAGE_TAG",
180 kumpf     1.21                 "Invalid language tag \"$0\".", languageTagString);
181                            PEG_METHOD_EXIT();
182                            throw Exception(MessageLoader::getMessage(parms));
183                        }
184                    
185                        if (isStandardFormat)
186                        {
187                            Uint32 variantIndex = 1;
188                    
189                            if (subtags[1].size() == 2)
190                            {
191                                country = subtags[1];
192                                variantIndex = 2;
193                            }
194                    
195                            Uint32 numSubtags = subtags.size();
196                    
197                            if (variantIndex < numSubtags)
198                            {
199                                variant = subtags[variantIndex++];
200                    
201 kumpf     1.21                 while (variantIndex < numSubtags)
202                                {
203                                    variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
204                                    variant.append(subtags[variantIndex++]);
205                                }
206                            }
207                        }
208                    
209                        PEG_METHOD_EXIT();
210                    }
211                    
212                    void LanguageParser::validateQualityValue(Real32 quality)
213                    {
214                        if ((quality > 1.0) || (quality < 0.0))
215                        {
216                            MessageLoaderParms parms(
217                                "Common.LanguageParser.INVALID_QUALITY_VALUE",
218                                "AcceptLanguage contains an invalid quality value");
219                            throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
220                        }
221                    }
222 kumpf     1.21     
223                    String LanguageParser::buildAcceptLanguageHeader(
224 a.dunfey  1.22.4.1     const AcceptLanguageList& acceptLanguages)
225 kumpf     1.21     {
226                        String alString;
227                        Uint32 numAcceptLanguages = acceptLanguages.size();
228                    
229                        for (Uint32 i = 0; i < numAcceptLanguages; i++)
230                        {
231                            alString.append(acceptLanguages.getLanguageTag(i).toString());
232                    
233                            Real32 q = acceptLanguages.getQualityValue(i);
234                            if (q != 1.0)
235                            {
236                                char qValueString[6];
237                                sprintf(qValueString, "%4.3f", q);
238                                alString.append(";q=");
239                                alString.append(qValueString);
240                            }
241                    
242                            if (i < numAcceptLanguages - 1)
243                            {
244                                alString.append(",");
245                            }
246 kumpf     1.21         }
247                    
248                        return alString;
249                    }
250                    
251                    String LanguageParser::buildContentLanguageHeader(
252 a.dunfey  1.22.4.1     const ContentLanguageList& contentLanguages)
253 kumpf     1.21     {
254                        String clString;
255                        Uint32 numContentLanguages = contentLanguages.size();
256                    
257                        for (Uint32 i = 0; i < numContentLanguages; i++)
258                        {
259                            clString.append(contentLanguages.getLanguageTag(i).toString());
260                    
261                            if (i < numContentLanguages - 1)
262                            {
263                                clString.append(",");
264                            }
265                        }
266                    
267                        return clString;
268                    }
269                    
270 a.dunfey  1.22.4.1 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
271 kumpf     1.21     {
272                    #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
273                        Locale default_loc = Locale::getDefault();
274                    
275                    # ifdef PEGASUS_OS_OS400
276                        char* tmp = (char*)default_loc.getName();
277                        char tmp_[100];
278                        EtoA(strcpy(tmp_,tmp));
279                        try
280                        {
281 kumpf     1.22             return LanguageParser::parseAcceptLanguageHeader(tmp_);
282 kumpf     1.21         }
283                    # else
284                        try
285                        {
286 kumpf     1.22             return LanguageParser::parseAcceptLanguageHeader(default_loc.getName());
287 kumpf     1.21         }
288                    # endif
289                        catch (const InvalidAcceptLanguageHeader& e)
290                        {
291                            Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
292                               "src.Server.cimserver.FAILED_TO_GET_PROCESS_LOCALE",
293                               "Could not convert the system locale to a valid accept-language "
294                                   "format");
295                            Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
296                                e.getMessage());
297 a.dunfey  1.22.4.1         AcceptLanguageList al;
298 kumpf     1.22             al.insert(LanguageTag("*"), 1);
299                            return al;
300 kumpf     1.21         }
301 kumpf     1.22     #else
302 a.dunfey  1.22.4.1     return AcceptLanguageList();
303 kumpf     1.21     #endif
304                    }
305                    
306                    void LanguageParser::_parseLanguageHeader(
307                        const String& languageHeaderValue,
308                        Array<String>& languageElements)
309                    {
310                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
311                    
312                        languageElements.clear();
313                        String element;
314                    
315                        for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
316                        {
317                            Char16 nextChar = languageHeaderValue[i];
318                    
319                            if (isascii(nextChar) && isspace(nextChar))
320                            {
321                                // Ignore whitespace
322                            }
323                            else if (nextChar == '(')
324 kumpf     1.21             {
325                                // Ignore comments
326                                while (i < len)
327                                {
328                                    // Search for the closing parenthesis
329                                    if (languageHeaderValue[i] == ')')
330                                    {
331                                        break;
332                                    }
333                    
334                                    // Skip over escape characters
335                                    if (languageHeaderValue[i] == '\\')
336                                    {
337                                        i++;
338                                    }
339                    
340                                    i++;
341                                }
342                    
343                                // Check for a missing closing parenthesis
344                                if (i >= len)
345 kumpf     1.21                 {
346                                    MessageLoaderParms parms(
347                                        "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
348                                        "Closing \")\" character is missing.");
349                                    throw Exception(MessageLoader::getMessage(parms));
350                                }
351                            }
352                            else if (nextChar == ',')
353                            {
354                                // Check for the end of the element
355                                languageElements.append(element);
356                                element.clear();
357                            }
358                            else
359                            {
360                                // Unescape an escape character
361                                if ((nextChar == '\\') && (i < len-1))
362                                {
363                                    nextChar = languageHeaderValue[++i];
364                                }
365                    
366 kumpf     1.21                 // Include this character in the value
367                                element.append(nextChar);
368                            }
369                        }
370                    
371                        // Include the last element in the languageElements array
372                        languageElements.append(element);
373                    
374                        PEG_METHOD_EXIT();
375                    }
376                    
377                    void LanguageParser::_parseAcceptLanguageElement(
378                        const String& acceptLanguageElement,
379                        String& languageTag,
380                        Real32& quality)
381                    {
382                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
383                    
384                        // look for ';' in acceptLanguageElement, that means we have a
385                        // quality value to capture.  If not present, we only have a language tag.
386                    
387 kumpf     1.21         Uint32 semicolonIndex = acceptLanguageElement.find(";");
388                        if (semicolonIndex != PEG_NOT_FOUND)
389                        {
390                            // Separate the language tag and quality value
391                    
392                            String qualityString =
393                                acceptLanguageElement.subString(semicolonIndex+1);
394                            languageTag = acceptLanguageElement.subString(0, semicolonIndex);
395                    
396                            // Parse the quality value
397 humberto  1.8      
398 kumpf     1.21             char dummyChar;
399                            int scanfConversions = sscanf(
400                                qualityString.getCString(),
401                                "q=%f%c", &quality, &dummyChar);
402                    
403                            if ((scanfConversions != 1) ||
404                                (qualityString.size() > 7))
405                            {
406                                MessageLoaderParms parms(
407                                    "Common.LanguageParser.INVALID_QUALITY_VALUE",
408                                    "AcceptLanguage contains an invalid quality value");
409                                PEG_METHOD_EXIT();
410                                throw Exception(MessageLoader::getMessage(parms));
411                            }
412                        }
413                        else
414                        {
415                            languageTag = acceptLanguageElement;
416                            quality = 1.0;
417                        }
418                    
419 kumpf     1.21         PEG_METHOD_EXIT();
420                    }
421                    
422                    void LanguageParser::_parseLanguageSubtags(
423                        Array<String>& subtags,
424                        const String& languageTagString)
425                    {
426                        PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
427                    
428                        // Parse the language tag into subtags
429                    
430                        Uint32 subtagIndex = 0;
431                        Uint32 separatorIndex;
432                        while ((separatorIndex = languageTagString.find(
433                                    subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
434                        {
435                            subtags.append(languageTagString.subString(
436                                subtagIndex, separatorIndex - subtagIndex));
437                            subtagIndex = separatorIndex + 1;
438                        }
439                        subtags.append(languageTagString.subString(subtagIndex));
440 kumpf     1.21     
441                        // Validate the syntax of each of the subtags
442                    
443                        for (Uint32 i = 0, n = subtags.size(); i < n; i++)
444                        {
445                            if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
446                                ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
447                            {
448                                MessageLoaderParms parms(
449                                    "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
450                                    "Malformed language tag \"$0\".", languageTagString);
451                                PEG_METHOD_EXIT();
452                                throw Exception(MessageLoader::getMessage(parms));
453                            }
454                        }
455                    
456                        PEG_METHOD_EXIT();
457 humberto  1.8      }
458                    
459 kumpf     1.21     Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
460                    {
461                        if ((subtag.size() == 0) || (subtag.size() > 8))
462                        {
463                            return false;
464                        }
465                    
466                        for (Uint32 i = 0, n = subtag.size(); i < n; i++)
467                        {
468                            if (!(isascii(subtag[i]) && isalpha(subtag[i])))
469                            {
470                                return false;
471                            }
472                        }
473                    
474                        return true;
475                    }
476                    
477                    Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
478                    {
479                        if ((subtag.size() == 0) || (subtag.size() > 8))
480 kumpf     1.21         {
481                            return false;
482                        }
483                    
484                        for (Uint32 i = 0, n = subtag.size(); i < n; i++)
485                        {
486                            if (!(isascii(subtag[i]) && isalnum(subtag[i])))
487                            {
488                                return false;
489                            }
490                        }
491                    
492                        return true;
493                    }
494 humberto  1.8      
495                    PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2