Package translate :: Package lang :: Module data
[hide private]
[frames] | no frames]

Source Code for Module translate.lang.data

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2007-2009 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """This module stores information and functionality that relates to plurals.""" 
 23   
 24  import unicodedata 
 25   
 26  from translate.storage.placeables import StringElem 
 27   
 28   
 29  languages = { 
 30  'af': ('Afrikaans', 2, '(n != 1)'), 
 31  'ak': ('Akan', 2, 'n > 1'), 
 32  'am': ('Amharic', 2, 'n > 1'), 
 33  'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5'), 
 34  'arn': ('Mapudungun; Mapuche', 2, 'n > 1'), 
 35  'az': ('Azerbaijani', 2, '(n != 1)'), 
 36  'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 37  'bg': ('Bulgarian', 2, '(n != 1)'), 
 38  'bn': ('Bengali', 2, '(n != 1)'), 
 39  'bn_IN': ('Bengali (India)', 2, '(n != 1)'), 
 40  'bo': ('Tibetan', 1, '0'), 
 41  'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 42  'ca': ('Catalan; Valencian', 2, '(n != 1)'), 
 43  'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
 44  'csb': ('Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 45  'cy': ('Welsh', 2, '(n==2) ? 1 : 0'), 
 46  'da': ('Danish', 2, '(n != 1)'), 
 47  'de': ('German', 2, '(n != 1)'), 
 48  'dz': ('Dzongkha', 1, '0'), 
 49  'el': ('Greek', 2, '(n != 1)'), 
 50  'en': ('English', 2, '(n != 1)'), 
 51  'en_GB': ('English (United Kingdom)', 2, '(n != 1)'), 
 52  'en_ZA': ('English (South Africa)', 2, '(n != 1)'), 
 53  'eo': ('Esperanto', 2, '(n != 1)'), 
 54  'es': ('Spanish; Castilian', 2, '(n != 1)'), 
 55  'et': ('Estonian', 2, '(n != 1)'), 
 56  'eu': ('Basque', 2, '(n != 1)'), 
 57  'fa': ('Persian', 1, '0'), 
 58  'fi': ('Finnish', 2, '(n != 1)'), 
 59  'fil': ('Filipino; Pilipino', 2, '(n > 1)'), 
 60  'fo': ('Faroese', 2, '(n != 1)'), 
 61  'fr': ('French', 2, '(n > 1)'), 
 62  'fur': ('Friulian', 2, '(n != 1)'), 
 63  'fy': ('Frisian', 2, '(n != 1)'), 
 64  'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'), 
 65  'gl': ('Galician', 2, '(n != 1)'), 
 66  'gu': ('Gujarati', 2, '(n != 1)'), 
 67  'gun': ('Gun', 2, '(n > 1)'), 
 68  'ha': ('Hausa', 2, '(n != 1)'), 
 69  'he': ('Hebrew', 2, '(n != 1)'), 
 70  'hi': ('Hindi', 2, '(n != 1)'), 
 71  'hy': ('Armenian', 1, '0'), 
 72  'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 73  'hu': ('Hungarian', 2, '(n != 1)'), 
 74  'id': ('Indonesian', 1, '0'), 
 75  'is': ('Icelandic', 2, '(n != 1)'), 
 76  'it': ('Italian', 2, '(n != 1)'), 
 77  'ja': ('Japanese', 1, '0'), 
 78  'jv': ('Javanese', 2, '(n != 1)'), 
 79  'ka': ('Georgian', 1, '0'), 
 80  'km': ('Khmer', 1, '0'), 
 81  'kn': ('Kannada', 2, '(n != 1)'), 
 82  'ko': ('Korean', 1, '0'), 
 83  'ku': ('Kurdish', 2, '(n != 1)'), 
 84  'kw': ('Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'), 
 85  'ky': ('Kirghiz; Kyrgyz', 1, '0'), 
 86  'lb': ('Luxembourgish; Letzeburgesch', 2, '(n != 1)'), 
 87  'ln': ('Lingala', 2, '(n > 1)'), 
 88  'lo': ('Lao', 1, '0'), 
 89  'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 90  'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'), 
 91  'mg': ('Malagasy', 2, '(n > 1)'), 
 92  'mi': ('Maori', 2, '(n > 1)'), 
 93  'mk': ('Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'), 
 94  'ml': ('Malayalam', 2, '(n != 1)'), 
 95  'mn': ('Mongolian', 2, '(n != 1)'), 
 96  'mr': ('Marathi', 2, '(n != 1)'), 
 97  'ms': ('Malay', 1, '0'), 
 98  'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'), 
 99  'nah': ('Nahuatl languages', 2, '(n != 1)'), 
100  'nap': ('Neapolitan', 2, '(n != 1)'), 
101  'nb': ('Norwegian Bokmal', 2, '(n != 1)'), 
102  'ne': ('Nepali', 2, '(n != 1)'), 
103  'nl': ('Dutch; Flemish', 2, '(n != 1)'), 
104  'nn': ('Norwegian Nynorsk', 2, '(n != 1)'), 
105  'nso': ('Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'), 
106  'or': ('Oriya', 2, '(n != 1)'), 
107  'pa': ('Panjabi; Punjabi', 2, '(n != 1)'), 
108  'pap': ('Papiamento', 2, '(n != 1)'), 
109  'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
110  'pms': ('Piemontese', 2, '(n != 1)'), 
111  'ps': ('Pushto; Pashto', 2, '(n != 1)'), 
112  'pt': ('Portuguese', 2, '(n != 1)'), 
113  'pt_BR': ('Portuguese (Brazil)', 2, '(n > 1)'), 
114  'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'), 
115  'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
116  'sco': ('Scots', 2, '(n != 1)'), 
117  'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
118  'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'), 
119  'so': ('Somali', 2, '(n != 1)'), 
120  'sq': ('Albanian', 2, '(n != 1)'), 
121  'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
122  'su': ('Sundanese', 1, '0'), 
123  'sv': ('Swedish', 2, '(n != 1)'), 
124  'ta': ('Tamil', 2, '(n != 1)'), 
125  'te': ('Telugu', 2, '(n != 1)'), 
126  'tg': ('Tajik', 2, '(n != 1)'), 
127  'ti': ('Tigrinya', 2, '(n > 1)'), 
128  'th': ('Thai', 1, '0'), 
129  'tk': ('Turkmen', 2, '(n != 1)'), 
130  'tr': ('Turkish', 1, '0'), 
131  'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
132  'vi': ('Vietnamese', 1, '0'), 
133  'wa': ('Walloon', 2, '(n > 1)'), 
134  # Chinese is difficult because the main divide is on script, not really  
135  # country. Simplified Chinese is used mostly in China, Singapore and Malaysia. 
136  # Traditional Chinese is used mostly in Hong Kong, Taiwan and Macau. 
137  'zh_CN': ('Chinese (China)', 1, '0'), 
138  'zh_HK': ('Chinese (Hong Kong)', 1, '0'), 
139  'zh_TW': ('Chinese (Taiwan)', 1, '0'), 
140  } 
141  """Dictionary of language data. 
142  The language code is the dictionary key (which may contain country codes and modifiers). 
143  The value is a tuple: (Full name in English, nplurals, plural equation)""" 
144   
145 -def simplercode(code):
146 """This attempts to simplify the given language code by ignoring country 147 codes, for example. 148 149 @see: 150 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt} 151 - U{http://www.rfc-editor.org/rfc/rfc4646.txt} 152 - U{http://www.rfc-editor.org/rfc/rfc4647.txt} 153 - U{http://www.w3.org/International/articles/language-tags/} 154 """ 155 if not code: 156 return code 157 158 normalized = normalize_code(code) 159 separator = normalized.rfind('-') 160 if separator >= 0: 161 return code[:separator] 162 else: 163 return ""
164 165 166 expansion_factors = { 167 'af': 0.1, 168 'ar': -0.09, 169 'es': 0.21, 170 'fr': 0.28, 171 'it': 0.2, 172 } 173 """Source to target string length expansion factors.""" 174 175 import gettext 176 import locale 177 import re 178 import os 179 180 iso639 = {} 181 """ISO 639 language codes""" 182 iso3166 = {} 183 """ISO 3166 country codes""" 184 185 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$") 186 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$") 187
188 -def languagematch(languagecode, otherlanguagecode):
189 """matches a languagecode to another, ignoring regions in the second""" 190 if languagecode is None: 191 return langcode_re.match(otherlanguagecode) 192 return languagecode == otherlanguagecode or \ 193 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
194 195 dialect_name_re = re.compile(r"(.+)\s\(([^)]+)\)$") 196
197 -def tr_lang(langcode=None):
198 """Gives a function that can translate a language name, even in the form C{"language (country)"}, 199 into the language with iso code langcode, or the system language if no language is specified.""" 200 langfunc = gettext_lang(langcode) 201 countryfunc = gettext_country(langcode) 202 203 def handlelanguage(name): 204 match = dialect_name_re.match(name) 205 if match: 206 language, country = match.groups() 207 return u"%s (%s)" % (langfunc(language), countryfunc(country)) 208 else: 209 return langfunc(name)
210 211 return handlelanguage 212
213 -def gettext_lang(langcode=None):
214 """Returns a gettext function to translate language names into the given 215 language, or the system language if no language is specified.""" 216 if not langcode in iso639: 217 if not langcode: 218 langcode = "" 219 if os.name == "nt": 220 # On Windows the default locale is not used for some reason 221 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True) 222 else: 223 t = gettext.translation('iso_639', fallback=True) 224 else: 225 t = gettext.translation('iso_639', languages=[langcode], fallback=True) 226 iso639[langcode] = t.ugettext 227 return iso639[langcode]
228
229 -def gettext_country(langcode=None):
230 """Returns a gettext function to translate country names into the given 231 language, or the system language if no language is specified.""" 232 if not langcode in iso3166: 233 if not langcode: 234 langcode = "" 235 if os.name == "nt": 236 # On Windows the default locale is not used for some reason 237 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True) 238 else: 239 t = gettext.translation('iso_3166', fallback=True) 240 else: 241 t = gettext.translation('iso_3166', languages=[langcode], fallback=True) 242 iso3166[langcode] = t.ugettext 243 return iso3166[langcode]
244
245 -def normalize(string, normal_form="NFC"):
246 """Return a unicode string in its normalized form 247 248 @param string: The string to be normalized 249 @param normal_form: NFC (default), NFD, NFCK, NFDK 250 @return: Normalized string 251 """ 252 if string is None: 253 return None 254 else: 255 return unicodedata.normalize(normal_form, string)
256
257 -def forceunicode(string):
258 """Ensures that the string is in unicode. 259 260 @param string: A text string 261 @type string: Unicode, String 262 @return: String converted to Unicode and normalized as needed. 263 @rtype: Unicode 264 """ 265 if string is None: 266 return None 267 if isinstance(string, str): 268 encoding = getattr(string, "encoding", "utf-8") 269 string = string.decode(encoding) 270 elif isinstance(string, StringElem): 271 string = unicode(string) 272 return string
273
274 -def normalized_unicode(string):
275 """Forces the string to unicode and does normalization.""" 276 return normalize(forceunicode(string))
277
278 -def normalize_code(code):
279 return code.replace("_", "-").replace("@", "-").lower()
280
281 -def simplify_to_common(language_code, languages=languages):
282 """Simplify language code to the most commonly used form for the 283 language, stripping country information for languages that tend 284 not to be localized differently for different countries""" 285 simpler = simplercode(language_code) 286 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "": 287 return language_code 288 else: 289 return simplify_to_common(simpler)
290