Package translate :: Package convert :: Module dtd2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.dtd2po

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """script to convert a mozilla .dtd UTF-8 localization format to a 
 23  gettext .po localization file using the po and dtd modules, and the 
 24  dtd2po convertor class which is in this module 
 25  You can convert back to .dtd using po2dtd.py""" 
 26   
 27  from translate.storage import po 
 28  from translate.storage import dtd 
 29  from translate.misc import quote 
 30  from translate.convert import accesskey as accesskeyfn 
 31   
32 -def is_css_entity(entity):
33 """Says if the given entity is likely to contain CSS that should not be 34 translated.""" 35 if '.' in entity: 36 prefix, suffix = entity.rsplit('.', 1) 37 if suffix in ["height", "width", "unixWidth", "macWidth", "size"] or suffix.startswith("style"): 38 return True 39 return False
40
41 -class dtd2po:
42 - def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
43 self.currentgroup = None 44 self.blankmsgstr = blankmsgstr 45 self.duplicatestyle = duplicatestyle
46
47 - def convertcomments(self, thedtd, thepo):
48 entity = quote.rstripeol(thedtd.entity) 49 if len(entity) > 0: 50 thepo.addlocation(thedtd.entity) 51 for commenttype, comment in thedtd.comments: 52 # handle groups 53 if (commenttype == "locgroupstart"): 54 groupcomment = comment.replace('BEGIN','GROUP') 55 self.currentgroup = groupcomment 56 elif (commenttype == "locgroupend"): 57 groupcomment = comment.replace('END','GROUP') 58 self.currentgroup = None 59 # handle automatic comment 60 if commenttype == "automaticcomment": 61 thepo.addnote(comment, origin="developer") 62 # handle normal comments 63 else: 64 thepo.addnote(quote.stripcomment(comment), origin="developer") 65 # handle group stuff 66 if self.currentgroup is not None: 67 thepo.addnote(quote.stripcomment(self.currentgroup), origin="translator") 68 if is_css_entity(entity): 69 thepo.addnote("Do not translate this. Only change the numeric values if you need this dialogue box to appear bigger", origin="developer")
70
71 - def convertstrings(self, thedtd, thepo):
72 # extract the string, get rid of quoting 73 unquoted = dtd.unquotefromdtd(thedtd.definition).replace("\r", "") 74 # escape backslashes... but not if they're for a newline 75 # unquoted = unquoted.replace("\\", "\\\\").replace("\\\\n", "\\n") 76 # now split the string into lines and quote them 77 lines = unquoted.split('\n') 78 while lines and not lines[0].strip(): 79 del lines[0] 80 while lines and not lines[-1].strip(): 81 del lines[-1] 82 # quotes have been escaped already by escapeforpo, so just add the start and end quotes 83 if len(lines) > 1: 84 thepo.source = "\n".join([lines[0].rstrip() + ' '] + \ 85 [line.strip() + ' ' for line in lines[1:-1]] + \ 86 [lines[-1].lstrip()]) 87 elif lines: 88 thepo.source = lines[0] 89 else: 90 thepo.source = "" 91 thepo.target = ""
92
93 - def convertunit(self, thedtd):
94 """converts a dtd unit to a po unit, returns None if empty or not for translation""" 95 if thedtd is None: 96 return None 97 if getattr(thedtd, "entityparameter", None) == "SYSTEM": 98 return None 99 thepo = po.pounit(encoding="UTF-8") 100 # remove unwanted stuff 101 for commentnum in range(len(thedtd.comments)): 102 commenttype, locnote = thedtd.comments[commentnum] 103 # if this is a localization note 104 if commenttype == 'locnote': 105 # parse the locnote into the entity and the actual note 106 typeend = quote.findend(locnote,'LOCALIZATION NOTE') 107 # parse the id 108 idstart = locnote.find('(', typeend) 109 if idstart == -1: 110 continue 111 idend = locnote.find(')', idstart+1) 112 entity = locnote[idstart+1:idend].strip() 113 # parse the actual note 114 actualnotestart = locnote.find(':', idend+1) 115 actualnoteend = locnote.find('-->', idend) 116 actualnote = locnote[actualnotestart+1:actualnoteend].strip() 117 # if it's for this entity, process it 118 if thedtd.entity == entity: 119 # if it says don't translate (and nothing more), 120 if actualnote.startswith("DONT_TRANSLATE"): 121 # take out the entity,definition and the DONT_TRANSLATE comment 122 thedtd.entity = "" 123 thedtd.definition = "" 124 del thedtd.comments[commentnum] 125 # finished this for loop 126 break 127 else: 128 # convert it into an automatic comment, to be processed by convertcomments 129 thedtd.comments[commentnum] = ("automaticcomment", actualnote) 130 # do a standard translation 131 self.convertcomments(thedtd, thepo) 132 self.convertstrings(thedtd, thepo) 133 if thepo.isblank() and not thepo.getlocations(): 134 return None 135 else: 136 return thepo
137
138 - def convertmixedunit(self, labeldtd, accesskeydtd):
139 labelpo = self.convertunit(labeldtd) 140 accesskeypo = self.convertunit(accesskeydtd) 141 if labelpo is None: 142 return accesskeypo 143 if accesskeypo is None: 144 return labelpo 145 thepo = po.pounit(encoding="UTF-8") 146 thepo.addlocations(labelpo.getlocations()) 147 thepo.addlocations(accesskeypo.getlocations()) 148 thepo.msgidcomment = thepo._extract_msgidcomments() + labelpo._extract_msgidcomments() 149 thepo.msgidcomment = thepo._extract_msgidcomments() + accesskeypo._extract_msgidcomments() 150 thepo.addnote(labelpo.getnotes("developer"), "developer") 151 thepo.addnote(accesskeypo.getnotes("developer"), "developer") 152 thepo.addnote(labelpo.getnotes("translator"), "translator") 153 thepo.addnote(accesskeypo.getnotes("translator"), "translator") 154 # redo the strings from original dtd... 155 label = dtd.unquotefromdtd(labeldtd.definition).decode('UTF-8') 156 accesskey = dtd.unquotefromdtd(accesskeydtd.definition).decode('UTF-8') 157 label = accesskeyfn.combine(label, accesskey) 158 if label is None: 159 return None 160 thepo.source = label 161 thepo.target = "" 162 return thepo
163
164 - def findmixedentities(self, thedtdfile):
165 """creates self.mixedentities from the dtd file...""" 166 self.mixedentities = {} # those entities which have a .label/.title and .accesskey combined 167 for entity in thedtdfile.index.keys(): 168 for labelsuffix in dtd.labelsuffixes: 169 if entity.endswith(labelsuffix): 170 entitybase = entity[:entity.rfind(labelsuffix)] 171 # see if there is a matching accesskey in this line, making this a 172 # mixed entity 173 for akeytype in dtd.accesskeysuffixes: 174 if thedtdfile.index.has_key(entitybase + akeytype): 175 # add both versions to the list of mixed entities 176 self.mixedentities[entity] = {} 177 self.mixedentities[entitybase+akeytype] = {}
178 # check if this could be a mixed entity (labelsuffix and ".accesskey") 179
180 - def convertdtdunit(self, thedtdfile, thedtd, mixbucket="dtd"):
181 """converts a dtd unit from thedtdfile to a po unit, handling mixed entities along the way...""" 182 # keep track of whether accesskey and label were combined 183 if thedtd.entity in self.mixedentities: 184 # use special convertmixed unit which produces one pounit with 185 # both combined for the label and None for the accesskey 186 alreadymixed = self.mixedentities[thedtd.entity].get(mixbucket, None) 187 if alreadymixed: 188 # we are successfully throwing this away... 189 return None 190 elif alreadymixed is None: 191 # depending on what we come across first, work out the label and the accesskey 192 labeldtd, accesskeydtd = None, None 193 labelentity, accesskeyentity = None, None 194 for labelsuffix in dtd.labelsuffixes: 195 if thedtd.entity.endswith(labelsuffix): 196 entitybase = thedtd.entity[:thedtd.entity.rfind(labelsuffix)] 197 for akeytype in dtd.accesskeysuffixes: 198 if thedtdfile.index.has_key(entitybase + akeytype): 199 labelentity, labeldtd = thedtd.entity, thedtd 200 accesskeyentity = labelentity[:labelentity.rfind(labelsuffix)]+akeytype 201 accesskeydtd = thedtdfile.index[accesskeyentity] 202 break 203 else: 204 for akeytype in dtd.accesskeysuffixes: 205 if thedtd.entity.endswith(akeytype): 206 accesskeyentity, accesskeydtd = thedtd.entity, thedtd 207 for labelsuffix in dtd.labelsuffixes: 208 labelentity = accesskeyentity[:accesskeyentity.rfind(akeytype)]+labelsuffix 209 if thedtdfile.index.has_key(labelentity): 210 labeldtd = thedtdfile.index[labelentity] 211 break 212 else: 213 labelentity = None 214 accesskeyentity = None 215 thepo = self.convertmixedunit(labeldtd, accesskeydtd) 216 if thepo is not None: 217 if accesskeyentity is not None: 218 self.mixedentities[accesskeyentity][mixbucket] = True 219 if labelentity is not None: 220 self.mixedentities[labelentity][mixbucket] = True 221 return thepo 222 else: 223 # otherwise the mix failed. add each one separately and remember they weren't mixed 224 if accesskeyentity is not None: 225 self.mixedentities[accesskeyentity][mixbucket] = False 226 if labelentity is not None: 227 self.mixedentities[labelentity][mixbucket] = False 228 return self.convertunit(thedtd)
229
230 - def convertstore(self, thedtdfile):
231 thetargetfile = po.pofile() 232 targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") 233 targetheader.addnote("extracted from %s" % thedtdfile.filename, "developer") 234 235 thedtdfile.makeindex() 236 self.findmixedentities(thedtdfile) 237 # go through the dtd and convert each unit 238 for thedtd in thedtdfile.units: 239 if thedtd.isnull(): 240 continue 241 thepo = self.convertdtdunit(thedtdfile, thedtd) 242 if thepo is not None: 243 thetargetfile.addunit(thepo) 244 thetargetfile.removeduplicates(self.duplicatestyle) 245 return thetargetfile
246
247 - def mergestore(self, origdtdfile, translateddtdfile):
248 thetargetfile = po.pofile() 249 targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") 250 targetheader.addnote("extracted from %s, %s" % (origdtdfile.filename, translateddtdfile.filename), "developer") 251 252 origdtdfile.makeindex() 253 self.findmixedentities(origdtdfile) 254 translateddtdfile.makeindex() 255 self.findmixedentities(translateddtdfile) 256 # go through the dtd files and convert each unit 257 for origdtd in origdtdfile.units: 258 if origdtd.isnull(): 259 continue 260 origpo = self.convertdtdunit(origdtdfile, origdtd, mixbucket="orig") 261 if origdtd.entity in self.mixedentities: 262 mixedentitydict = self.mixedentities[origdtd.entity] 263 if "orig" not in mixedentitydict: 264 # this means that the entity is mixed in the translation, but not the original - treat as unmixed 265 mixbucket = "orig" 266 del self.mixedentities[origdtd.entity] 267 elif mixedentitydict["orig"]: 268 # the original entity is already mixed successfully 269 mixbucket = "translate" 270 else: 271 # ?? 272 mixbucket = "orig" 273 else: 274 mixbucket = "translate" 275 if origpo is None: 276 # this means its a mixed entity (with accesskey) that's already been dealt with) 277 continue 278 if origdtd.entity in translateddtdfile.index: 279 translateddtd = translateddtdfile.index[origdtd.entity] 280 translatedpo = self.convertdtdunit(translateddtdfile, translateddtd, mixbucket=mixbucket) 281 else: 282 translatedpo = None 283 if origpo is not None: 284 if translatedpo is not None and not self.blankmsgstr: 285 origpo.target = translatedpo.source 286 thetargetfile.addunit(origpo) 287 thetargetfile.removeduplicates(self.duplicatestyle) 288 return thetargetfile
289
290 -def convertdtd(inputfile, outputfile, templatefile, pot=False, duplicatestyle="msgctxt"):
291 """reads in inputfile and templatefile using dtd, converts using dtd2po, writes to outputfile""" 292 inputstore = dtd.dtdfile(inputfile) 293 convertor = dtd2po(blankmsgstr=pot, duplicatestyle=duplicatestyle) 294 if templatefile is None: 295 outputstore = convertor.convertstore(inputstore) 296 else: 297 templatestore = dtd.dtdfile(templatefile) 298 outputstore = convertor.mergestore(templatestore, inputstore) 299 if outputstore.isempty(): 300 return 0 301 outputfile.write(str(outputstore)) 302 return 1
303
304 -def main(argv=None):
305 from translate.convert import convert 306 formats = {"dtd": ("po", convertdtd), ("dtd", "dtd"): ("po", convertdtd)} 307 parser = convert.ConvertOptionParser(formats, usetemplates=True, usepots=True, description=__doc__) 308 parser.add_duplicates_option() 309 parser.passthrough.append("pot") 310 parser.run(argv)
311 312 if __name__ == '__main__': 313 main() 314