Package translate :: Package storage :: Module catkeys
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.catkeys

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2010 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Manage the Haiku catkeys translation format 
 22   
 23     The Haiku catkeys format is the translation format used for localisation of 
 24     the U{Haiku<http://www.haiku-os.org/>} operating system. 
 25   
 26     It is a bilingual base class derived format with L{CatkeysFile} and 
 27     L{CatkeysUnit} providing file and unit level access.  The file format is 
 28     described here: 
 29     http://www.haiku-os.org/blog/pulkomandy/2009-09-24_haiku_locale_kit_translator_handbook 
 30   
 31     Implementation 
 32     ============== 
 33     The implementation covers the full requirements of a catkeys file. The 
 34     files are simple Tab Separated Value (TSV) files that can be read 
 35     by Microsoft Excel and other spreadsheet programs. They use the .txt 
 36     extension which does make it more difficult to automatically identify 
 37     such files. 
 38   
 39     The dialect of the TSV files is specified by L{CatkeysDialect}. 
 40   
 41     Encoding 
 42     -------- 
 43     The files are UTF-8 encoded. 
 44   
 45     Header 
 46     ------ 
 47     L{CatkeysHeader} provides header management support. 
 48   
 49     Escaping 
 50     -------- 
 51     catkeys seem to escape things like in C++ (strings are just extracted from 
 52     the source code unchanged, it seems. 
 53   
 54     Functions allow for L{escaping<_escape>} and L{unescaping<_unescape>}. 
 55  """ 
 56   
 57  import csv 
 58  import sys 
 59   
 60  from translate.lang import data 
 61  from translate.storage import base 
 62   
 63  FIELDNAMES_HEADER = ["version", "language", "mimetype", "checksum"] 
 64  """Field names for the catkeys header""" 
 65   
 66  FIELDNAMES = ["source", "context", "comment", "target"] 
 67  """Field names for a catkeys TU""" 
 68   
 69  FIELDNAMES_HEADER_DEFAULTS = { 
 70  "version": "1", 
 71  "language": "", 
 72  "mimetype": "", 
 73  "checksum": "", 
 74  } 
 75  """Default or minimum header entries for a catkeys file""" 
 76   
 77  _unescape_map = {"\\r": "\r", "\\t": "\t", '\\n': '\n', '\\\\': '\\'} 
 78  _escape_map = dict([(value, key) for (key, value) in _unescape_map.items()]) 
 79  # We don't yet do escaping correctly, just for lack of time to do it.  The 
 80  # current implementation is just based on something simple that will work with 
 81  # investaged files.  The only escapes found were "\n", "\t", "\\n" 
 82   
 83   
84 -def _escape(string):
85 if string: 86 string = string.replace(r"\n", r"\\n").replace("\n", "\\n").replace("\t", "\\t") 87 return string
88 89
90 -def _unescape(string):
91 if string: 92 string = string.replace("\\n", "\n").replace("\\t", "\t").replace(r"\n", r"\\n") 93 return string
94 95
96 -class CatkeysDialect(csv.Dialect):
97 """Describe the properties of a catkeys generated TAB-delimited file.""" 98 delimiter = "\t" 99 lineterminator = "\n" 100 quoting = csv.QUOTE_NONE 101 if sys.version_info < (2, 5, 0): 102 # We need to define the following items for csv in Python < 2.5 103 quoting = csv.QUOTE_MINIMAL # catkeys does not quote anything, since we escape 104 # \t anyway in _escape this should not be a problem 105 doublequote = False 106 skipinitialspace = False 107 escapechar = None 108 quotechar = '"'
109 csv.register_dialect("catkeys", CatkeysDialect) 110 111
112 -class CatkeysHeader(object):
113 """A catkeys translation memory header""" 114
115 - def __init__(self, header=None):
116 self._header_dict = {} 117 if not header: 118 self._header_dict = self._create_default_header() 119 elif isinstance(header, dict): 120 self._header_dict = header
121
122 - def _create_default_header(self):
123 """Create a default catkeys header""" 124 defaultheader = FIELDNAMES_HEADER_DEFAULTS.copy() 125 return defaultheader
126
127 - def settargetlanguage(self, newlang):
128 """Set a human readable target language""" 129 if not newlang or newlang not in data.languages: 130 return 131 #XXX assumption about the current structure of the languages dict in data 132 self._header_dict['language'] = data.languages[newlang][0].lower()
133 targetlanguage = property(None, settargetlanguage)
134 135
136 -class CatkeysUnit(base.TranslationUnit):
137 """A catkeys translation memory unit""" 138
139 - def __init__(self, source=None):
140 self._dict = {} 141 if source: 142 self.source = source 143 super(CatkeysUnit, self).__init__(source)
144
145 - def getdict(self):
146 """Get the dictionary of values for a catkeys line""" 147 return self._dict
148
149 - def setdict(self, newdict):
150 """Set the dictionary of values for a catkeys line 151 152 @param newdict: a new dictionary with catkeys line elements 153 @type newdict: Dict 154 """ 155 # TODO First check that the values are OK 156 self._dict = newdict
157 dict = property(getdict, setdict) 158
159 - def _get_source_or_target(self, key):
160 if self._dict.get(key, None) is None: 161 return None 162 elif self._dict[key]: 163 return _unescape(self._dict[key]).decode('utf-8') 164 else: 165 return ""
166
167 - def _set_source_or_target(self, key, newvalue):
168 if newvalue is None: 169 self._dict[key] = None 170 if isinstance(newvalue, unicode): 171 newvalue = newvalue.encode('utf-8') 172 newvalue = _escape(newvalue) 173 if not key in self._dict or newvalue != self._dict[key]: 174 self._dict[key] = newvalue
175
176 - def getsource(self):
177 return self._get_source_or_target('source')
178
179 - def setsource(self, newsource):
180 self._rich_source = None 181 return self._set_source_or_target('source', newsource)
182 source = property(getsource, setsource) 183
184 - def gettarget(self):
185 return self._get_source_or_target('target')
186
187 - def settarget(self, newtarget):
188 self._rich_target = None 189 return self._set_source_or_target('target', newtarget)
190 target = property(gettarget, settarget) 191
192 - def getnotes(self, origin=None):
193 if not origin or origin in ["programmer", "developer", "source code"]: 194 return self._dict["comment"].decode('utf-8') 195 return u""
196
197 - def getcontext(self):
198 return self._dict["context"].decode('utf-8')
199
200 - def getid(self):
201 context = self.getcontext() 202 notes = self.getnotes() 203 id = self.source 204 if notes: 205 id = u"%s\04%s" % (notes, id) 206 if context: 207 id = u"%s\04%s" % (context, id) 208 return id
209
210 - def markfuzzy(self, present=True):
211 if present: 212 self.target = u""
213
214 - def settargetlang(self, newlang):
215 self._dict['target-lang'] = newlang
216 targetlang = property(None, settargetlang) 217
218 - def __str__(self):
219 return str(self._dict)
220
221 - def istranslated(self):
222 if not self._dict.get('source', None): 223 return False 224 return bool(self._dict.get('target', None))
225
226 - def merge(self, otherunit, overwrite=False, comments=True, 227 authoritative=False):
228 """Do basic format agnostic merging.""" 229 # We can't go fuzzy, so just do nothing 230 if self.source != otherunit.source or self.getcontext() != otherunit.getcontext() or otherunit.isfuzzy(): 231 return 232 if not self.istranslated() or overwrite: 233 self.rich_target = otherunit.rich_target
234 235
236 -class CatkeysFile(base.TranslationStore):
237 """A catkeys translation memory file""" 238 Name = _("Haiku catkeys file") 239 Mimetypes = ["application/x-catkeys"] 240 Extensions = ["catkeys"] 241
242 - def __init__(self, inputfile=None, unitclass=CatkeysUnit):
243 """Construct a catkeys store, optionally reading in from inputfile.""" 244 self.UnitClass = unitclass 245 base.TranslationStore.__init__(self, unitclass=unitclass) 246 self.filename = '' 247 self.header = CatkeysHeader() 248 self._encoding = 'utf-8' 249 if inputfile is not None: 250 self.parse(inputfile)
251
252 - def settargetlanguage(self, newlang):
253 self.header.settargetlanguage(newlang)
254
255 - def parse(self, input):
256 """parsse the given file or file source string""" 257 if hasattr(input, 'name'): 258 self.filename = input.name 259 elif not getattr(self, 'filename', ''): 260 self.filename = '' 261 if hasattr(input, "read"): 262 tmsrc = input.read() 263 input.close() 264 input = tmsrc 265 for header in csv.DictReader(input.split("\n")[:1], fieldnames=FIELDNAMES_HEADER, dialect="catkeys"): 266 self.header = CatkeysHeader(header) 267 lines = csv.DictReader(input.split("\n")[1:], fieldnames=FIELDNAMES, dialect="catkeys") 268 for line in lines: 269 newunit = CatkeysUnit() 270 newunit.dict = line 271 self.addunit(newunit)
272
273 - def __str__(self):
274 output = csv.StringIO() 275 writer = csv.DictWriter(output, fieldnames=FIELDNAMES_HEADER, dialect="catkeys") 276 writer.writerow(self.header._header_dict) 277 writer = csv.DictWriter(output, fieldnames=FIELDNAMES, dialect="catkeys") 278 for unit in self.units: 279 writer.writerow(unit.dict) 280 return output.getvalue()
281