Package translate :: Package storage :: Module factory
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.factory

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2006-2010 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """factory methods to build real storage objects that conform to base.py""" 
 23   
 24  import os 
 25   
 26   
 27  #TODO: Monolingual formats (with template?) 
 28   
 29  decompressclass = { 
 30      'gz': ("gzip", "GzipFile"), 
 31      'bz2': ("bz2", "BZ2File"), 
 32  } 
 33   
 34   
 35  classes_str = { 
 36             "csv": ("csvl10n", "csvfile"), 
 37             "tab": ("omegat", "OmegaTFileTab"), "utf8": ("omegat", "OmegaTFile"), 
 38             "po": ("po", "pofile"), "pot": ("po", "pofile"), 
 39             "mo": ("mo", "mofile"), "gmo": ("mo", "mofile"), 
 40             "qm": ("qm", "qmfile"), 
 41             "utx": ("utx", "UtxFile"), 
 42             "_wftm": ("wordfast", "WordfastTMFile"), 
 43             "_trados_txt_tm": ("trados", "TradosTxtTmFile"), 
 44             "catkeys": ("catkeys", "CatkeysFile"), 
 45   
 46             "qph": ("qph", "QphFile"), 
 47             "tbx": ("tbx", "tbxfile"), 
 48             "tmx": ("tmx", "tmxfile"), 
 49             "ts":  ("ts2", "tsfile"), 
 50             "xliff": ("xliff", "xlifffile"), "xlf": ("xliff", "xlifffile"), 
 51             "sdlxliff": ("xliff", "xlifffile"), 
 52  } 
 53  ###  XXX:  if you add anything here, you must also add it to translate.storage. 
 54   
 55  """Dictionary of file extensions and the names of their associated class. 
 56   
 57  Used for dynamic lazy loading of modules. 
 58  _ext is a pseudo extension, that is their is no real extension by that name. 
 59  """ 
 60   
61 -def _examine_txt(storefile):
62 """Determine the true filetype for a .txt file""" 63 if isinstance(storefile, basestring) and os.path.exists(storefile): 64 storefile = open(storefile) 65 try: 66 start = storefile.read(600).strip() 67 except AttributeError: 68 raise ValueError("Need to read object to determine type") 69 # Some encoding magic for Wordfast 70 from translate.storage import wordfast 71 if wordfast.TAB_UTF16 in start.split("\n")[0]: 72 encoding = 'utf-16' 73 else: 74 encoding = 'iso-8859-1' 75 start = start.decode(encoding).encode('utf-8') 76 if '%Wordfast TM' in start: 77 pseudo_extension = '_wftm' 78 elif '<RTF Preamble>' in start: 79 pseudo_extension = '_trados_txt_tm' 80 else: 81 raise ValueError("Failed to guess file type.") 82 storefile.seek(0) 83 return pseudo_extension
84 85 hiddenclasses = {"txt": _examine_txt} 86 87
88 -def _guessextention(storefile):
89 """Guesses the type of a file object by looking at the first few characters. 90 The return value is a file extention .""" 91 start = storefile.read(300).strip() 92 if '<xliff ' in start: 93 extention = 'xlf' 94 elif 'msgid "' in start: 95 extention = 'po' 96 elif '%Wordfast TM' in start: 97 extention = 'txt' 98 elif '<!DOCTYPE TS>' in start: 99 extention = 'ts' 100 elif '<tmx ' in start: 101 extention = 'tmx' 102 elif '#UTX' in start: 103 extention = 'utx' 104 else: 105 raise ValueError("Failed to guess file type.") 106 storefile.seek(0) 107 return extention
108 109
110 -def _getdummyname(storefile):
111 """Provides a dummy name for a file object without a name attribute, by guessing the file type.""" 112 return 'dummy.' + _guessextention(storefile)
113 114
115 -def _getname(storefile):
116 """returns the filename""" 117 if storefile is None: 118 raise ValueError("This method cannot magically produce a filename when given None as input.") 119 if not isinstance(storefile, basestring): 120 if not hasattr(storefile, "name"): 121 storefilename = _getdummyname(storefile) 122 else: 123 storefilename = storefile.name 124 else: 125 storefilename = storefile 126 return storefilename
127 128
129 -def getclass(storefile, ignore=None, classes=None, classes_str=classes_str, hiddenclasses=hiddenclasses):
130 """Factory that returns the applicable class for the type of file presented. 131 Specify ignore to ignore some part at the back of the name (like .gz). """ 132 storefilename = _getname(storefile) 133 if ignore and storefilename.endswith(ignore): 134 storefilename = storefilename[:-len(ignore)] 135 root, ext = os.path.splitext(storefilename) 136 ext = ext[len(os.path.extsep):].lower() 137 decomp = None 138 if ext in decompressclass: 139 decomp = ext 140 root, ext = os.path.splitext(root) 141 ext = ext[len(os.path.extsep):].lower() 142 if ext in hiddenclasses: 143 guesserfn = hiddenclasses[ext] 144 if decomp: 145 _module, _class = decompressclass[decomp] 146 module = __import__(_module, globals(), {}, []) 147 _file = getattr(module, _class) 148 ext = guesserfn(_file(storefile)) 149 else: 150 ext = guesserfn(storefile) 151 try: 152 # we prefer classes (if given) since that is the older API that Pootle uses 153 if classes: 154 storeclass = classes[ext] 155 else: 156 _module, _class = classes_str[ext] 157 module = __import__("translate.storage.%s" % _module, globals(), {}, _module) 158 storeclass = getattr(module, _class) 159 except KeyError: 160 raise ValueError("Unknown filetype (%s)" % storefilename) 161 return storeclass
162 163
164 -def getobject(storefile, ignore=None, classes=None, classes_str=classes_str, hiddenclasses=hiddenclasses):
165 """Factory that returns a usable object for the type of file presented. 166 167 @type storefile: file or str 168 @param storefile: File object or file name. 169 170 Specify ignore to ignore some part at the back of the name (like .gz). 171 """ 172 173 if isinstance(storefile, basestring): 174 if os.path.isdir(storefile) or storefile.endswith(os.path.sep): 175 from translate.storage import directory 176 return directory.Directory(storefile) 177 storefilename = _getname(storefile) 178 storeclass = getclass(storefile, ignore, classes=classes, classes_str=classes_str, hiddenclasses=hiddenclasses) 179 if os.path.exists(storefilename) or not getattr(storefile, "closed", True): 180 name, ext = os.path.splitext(storefilename) 181 ext = ext[len(os.path.extsep):].lower() 182 if ext in decompressclass: 183 _module, _class = decompressclass[ext] 184 module = __import__(_module, globals(), {}, []) 185 _file = getattr(module, _class) 186 storefile = _file(storefilename) 187 store = storeclass.parsefile(storefile) 188 else: 189 store = storeclass() 190 store.filename = storefilename 191 return store
192 193 194 supported = [ 195 ('Gettext PO file', ['po', 'pot'], ["text/x-gettext-catalog", "text/x-gettext-translation", "text/x-po", "text/x-pot"]), 196 ('XLIFF Translation File', ['xlf', 'xliff', 'sdlxliff'], ["application/x-xliff", "application/x-xliff+xml"]), 197 ('Gettext MO file', ['mo', 'gmo'], ["application/x-gettext-catalog", "application/x-mo"]), 198 ('Qt .qm file', ['qm'], ["application/x-qm"]), 199 ('TBX Glossary', ['tbx'], ['application/x-tbx']), 200 ('TMX Translation Memory', ['tmx'], ["application/x-tmx"]), 201 ('Qt Linguist Translation File', ['ts'], ["application/x-linguist"]), 202 ('Qt Phrase Book', ['qph'], ["application/x-qph"]), 203 ('OmegaT Glossary', ['utf8', 'tab'], ["application/x-omegat-glossary"]), 204 ('UTX Simple Dictionary', ['utx'], ["text/x-utx"]), 205 ('Haiku catkeys file', ['catkeys'], ["application/x-catkeys"]), 206 ] 207
208 -def supported_files():
209 """Returns data about all supported files 210 211 @return: list of type that include (name, extensions, mimetypes) 212 @rtype: list 213 """ 214 return supported[:]
215