(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.11 //%2005////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 humberto 1.8  //
 12               // Permission is hereby granted, free of charge, to any person obtaining a copy
 13               // of this software and associated documentation files (the "Software"), to
 14               // deal in the Software without restriction, including without limitation the
 15               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 16               // sell copies of the Software, and to permit persons to whom the Software is
 17               // furnished to do so, subject to the following conditions:
 18               // 
 19               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 20               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 21               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 22               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 23               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 24               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 25               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 26               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 27               //
 28               //==============================================================================
 29               //
 30               // Author: Humberto Rivero (hurivero@us.ibm.com)
 31               //
 32 aruran.ms 1.15 // Modified By: Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3697, 3698, 3699, 3700
 33 aruran.ms 1.19 //              Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3701, 3702, 3703, 3704
 34 humberto  1.8  //
 35                //%/////////////////////////////////////////////////////////////////////////////
 36                
 37                #include <Pegasus/Common/LanguageParser.h>
 38                #include <Pegasus/Common/InternalException.h>
 39                #include <Pegasus/Common/Tracer.h>
 40                #include <Pegasus/Common/MessageLoader.h> //l10n
 41                #include <cstring>
 42                
 43                //PEGASUS_USING_STD;
 44                PEGASUS_NAMESPACE_BEGIN
 45                
 46                const LanguageParser LanguageParser::EMPTY = LanguageParser();
 47                
 48 aruran.ms 1.12 void LanguageParser::parseHdr(Array<String> &values, String hdr){
 49 humberto  1.8  	// look for ',' which designates distict (Accept/Content)-Language fields
 50                	// the form: [languagetag, languagetag, languagetag] so whitespace removal
 51                	// may be necessary.
 52                	// then store them in the array
 53                	PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseHdr");
 54                	Uint32 i = 0;
 55                	while( i != PEG_NOT_FOUND ){
 56                		i = hdr.find(",");
 57                		if( i != PEG_NOT_FOUND ){
 58                			values.append(hdr.subString(0,i));
 59                			while(hdr[i+1] == ' ') i++;  // get rid of whitespace after ","
 60                			hdr = hdr.subString(i+1);
 61                		}
 62                		else{  // only one field, build an object with it
 63                			values.append(hdr);
 64                		}
 65                	}
 66                	PEG_METHOD_EXIT();
 67                }
 68                
 69 aruran.ms 1.13 Real32 LanguageParser::parseAcceptLanguageValue(String &language_tag, String hdr){
 70 humberto  1.8  	// look for ';' in hdr, that means we have a quality value to capture
 71                    // if not, we only have a language
 72                    
 73                    // if hdr begins with "x-" then we have a non-IANA (private) language tag
 74                	 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageValue");
 75                    Uint32 i;
 76                    Boolean validate_length = true;
 77                	if((( i = hdr.find("x-")) != PEG_NOT_FOUND ) && (i == 0)){
 78                		hdr = convertPrivateLanguageTag(hdr);
 79                		validate_length = false;	
 80                	}
 81                	
 82                	// get rid of any beginning or trailing whitespaces
 83                	Uint32 j;
 84                	while( (j = hdr.find(" ")) != PEG_NOT_FOUND ){
 85                		hdr.remove(j,1);
 86                	}
 87                	
 88                    Real32 quality = 1;
 89                	i = hdr.find(";");
 90                	if(i != PEG_NOT_FOUND){ // extract and store language and quality
 91 humberto  1.8  		if(isValid(hdr.subString(0,i), validate_length)){
 92                			language_tag = hdr.subString(0,i);
 93                			if(hdr.size() > i + 3)
 94                				hdr.remove(0,i+3);  // remove everything but the quality value
 95                			else{
 96                				MessageLoaderParms parms("Common.LanguageParser.INVALID_QUALITY_VALUE",
 97                									 "AcceptLanguage contains an invalid quality value");
 98                				throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));	
 99                			}
100                		}
101                		else{
102                			//l10n
103                			//throw InvalidAcceptLanguageHeader(
104                				//"AcceptLanguage contains too many characters or non-alpha characters");
105                			MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_AL",
106                									 "AcceptLanguage contains too many characters or non-alpha characters");
107                			throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
108                		}
109                		//validate quality 	
110                		quality = atof(hdr.getCString());
111                		if(quality > 1.0 || quality < 0.0){
112 humberto  1.8  			//l10n
113                			//throw InvalidAcceptLanguageHeader(
114                				//"AcceptLanguage contains an invalid quality value");
115                			MessageLoaderParms parms("Common.LanguageParser.INVALID_QUALITY_VALUE",
116                									 "AcceptLanguage contains an invalid quality value");
117                			throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
118                		}
119                	}
120                	else{	// extract and store language, quality defaults to 1.0
121                		if(isValid(hdr, validate_length)) language_tag = hdr;
122                		else{ 
123                			//l10n
124                			//throw InvalidAcceptLanguageHeader(
125                				//"AcceptLanguage contains too many characters or non-alpha characters");
126                			MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_AL",
127                									 "AcceptLanguage contains too many characters or non-alpha characters");
128                			throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
129                		}
130                	}
131                	
132                	PEG_METHOD_EXIT();
133 humberto  1.8  	return quality;
134                }
135                
136 aruran.ms 1.14 String LanguageParser::parseContentLanguageValue(const String& hdr){
137 humberto  1.8  	// we are looking for the language part of the hdr only,
138                	// according to the RFC, there may be parenthesized strings
139                	// that describe the purpose of the language, we need to ignore those
140                	PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageValue");
141                	String value = hdr;
142                	Uint32 i,j;
143                	while((i = value.find("(")) != PEG_NOT_FOUND){ // get rid of anything in parenthesis in hdr if found
144                		if((j = value.find(")")) != PEG_NOT_FOUND)
145                			value.remove(i, (j-i)+1);
146                		else{
147                			//l10n
148                			  //throw InvalidContentLanguageHeader(
149                							//"ContentLanguage does not contain terminating ) character");
150                			MessageLoaderParms parms("Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
151                									 "ContentLanguage does not contain terminating ) character");
152                			throw InvalidContentLanguageHeader(MessageLoader::getMessage(parms));
153                		}
154                	}
155                	// get rid of any beginning or trailing whitespaces
156                	while( (i = value.find(" ")) != PEG_NOT_FOUND ){
157                		value.remove(i,1);
158 humberto  1.8  	}
159                	if(!isValid(value)){
160                		//l10n
161                		 //throw InvalidContentLanguageHeader(
162                							//"ContentLanguage contains too many characters or non-alpha characters");
163                		MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_CL",
164                								 "ContentLanguage contains too many characters or non-alpha characters");
165                		throw InvalidContentLanguageHeader(MessageLoader::getMessage(parms));
166                	
167                	}
168                	PEG_METHOD_EXIT();
169                	return value;
170                }
171                
172 aruran.ms 1.15 String LanguageParser::getLanguage(const String & language_tag){
173 humberto  1.8  	// given a language_tag: en-US-mn we want to return "en"
174                	Uint32 i;
175                	if((i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND)
176                		return language_tag.subString(0,i);
177                	return String(language_tag);
178                } 
179                
180 aruran.ms 1.15 String LanguageParser::getCountry(const String & language_tag){
181 humberto  1.8  	// given a language_tag: en-US-mn we want to return "US"
182                	Uint32 i,j;
183                	if( (i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
184                		if( (j = language_tag.find(i+1, findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
185                			return language_tag.subString(i+1, j-(i+1));
186                		else 
187                			return language_tag.subString(i+1);
188                	return String::EMPTY;
189                }
190                
191 aruran.ms 1.15 String LanguageParser::getVariant(const String & language_tag){
192 humberto  1.8  	// given a language_tag: en-US-mn we want to return "mn"
193                	Uint32 i;
194                	if( (i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
195                		if( (i = language_tag.find(i+1, findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
196                			return language_tag.subString(i+1);
197                	return String::EMPTY;
198                }
199                
200                void LanguageParser::parseLanguageSubtags(Array<String> &subtags, String language_tag){
201                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageSubtags");
202                	Uint32 i;
203                	char separator = findSeparator(language_tag.getCString());
204                	while( (i = language_tag.find(Char16(separator))) != PEG_NOT_FOUND ){
205                			subtags.append(language_tag.subString(0,i));
206                			language_tag.remove(0,i + 1);		
207                	}
208                	if(language_tag.size() > 0)
209                		subtags.append(language_tag);
210                	PEG_METHOD_EXIT();
211                }
212                
213 aruran.ms 1.16 Boolean LanguageParser::isValid(const String& language_tag, Boolean validate_length){
214 humberto  1.8  	//break the String down into parts(subtags), then validate each part
215                	
216                	if(language_tag == "*") return true;
217                	
218                	Array<String> subtags;
219                	parseLanguageSubtags(subtags, language_tag);
220                	if(subtags.size() > 0){ 
221                		for(Uint32 i = 0; i < subtags.size(); i++){
222                			//length should be 8 or less AND all characters should be A-Z or a-z
223                			if((validate_length && subtags[i].size() > 8) || !checkAlpha(subtags[i].getCString()))
224                				return false;
225                		}
226                	}
227                	else{ //nothing back from parseLanguageSubtags
228                		return false;
229                	}
230                	return true;
231                }
232                
233 aruran.ms 1.17 String LanguageParser::convertPrivateLanguageTag(const String & language_tag){
234 humberto  1.8  	// figure out if its a unix style locale or windows locale
235                	Uint32 i;
236                	if(( i = language_tag.find("pegasus-")) != PEG_NOT_FOUND ){
237 aruran.ms 1.17 		String str;
238                		str = language_tag.subString(i+5);
239                		//language_tag = language_tag.subString(i+5);  // capture the remainder of the string
240                		return String(replaceSeparator(str.getCString(), '-'));
241 humberto  1.8  	}
242                	//else if( (i = language_tag.find("win-")) != PEG_NOT_FOUND ){
243                	  // return language_tag.subString(i+4);  // capture the remainder of the string
244                		// call windows ID to ICU convert routine or locmap.c function here
245                	//}
246                	else{
247                		return language_tag;
248                	}		
249                }
250                
251 aruran.ms 1.18 Boolean LanguageParser::checkAlpha(const CString & _str){
252 humberto  1.7  	Uint32 length = (Uint32) strlen(_str);
253 humberto  1.8  	for(Uint32 i = 0; i < length; i++)
254                		if( !isalpha(_str[i]) )
255                			return false;
256                	return true;
257                }
258                
259 aruran.ms 1.18 char LanguageParser::findSeparator(const CString & _str){
260 kumpf     1.4  	Uint32 length = (Uint32) strlen(_str);
261 humberto  1.8  	for(Uint32 i = 0; i < length; i++)
262                		if(!isalnum(_str[i]))
263                			return _str[i];
264                	return '\0';
265                }
266                
267 aruran.ms 1.18 CString LanguageParser::replaceSeparator(const CString & _s, char new_sep){
268 aruran.ms 1.20     const Uint32 length = (Uint32) strlen(_s);
269                    AutoArrayPtr<char> _str(new char[length + 1]);
270                    strcpy(_str.get(),_s);
271                    for(Uint32 i = 0; i < length; i++)
272                    {
273                        if (!isalnum(_str.get()[i]))
274                        {
275                            _str.get()[i] = new_sep;
276                        }
277                    }
278                	const String retval(_str.get());
279                    _str.release();
280                    return(retval.getCString());
281 humberto  1.8  }
282                
283                
284                PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2