1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 Classes that hold units of .oo files (oounit) or entire files (oofile).
24
25 These are specific .oo files for localisation exported by OpenOffice.org - SDF
26 format (previously knows as GSI files). For an overview of the format, see
27 U{http://l10n.openoffice.org/L10N_Framework/Intermediate_file_format.html}
28
29 The behaviour in terms of escaping is explained in detail in the programming
30 comments.
31 """
32
33
34 import os
35 import re
36 import warnings
37
38 from translate.misc import quote
39 from translate.misc import wStringIO
40
41
42
43 normalfilenamechars = "/#.0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
44 normalizetable = ""
45 for i in map(chr, range(256)):
46 if i in normalfilenamechars:
47 normalizetable += i
48 else:
49 normalizetable += "_"
50
51
53
55 self.normalchars = {}
56 for char in normalchars:
57 self.normalchars[ord(char)] = char
58
60 return self.normalchars.get(key, u"_")
61
62 unormalizetable = unormalizechar(normalfilenamechars.decode("ascii"))
63
64
71
72
74 """converts an oo key tuple into a unique identifier
75
76 @param ookey: an oo key
77 @type ookey: tuple
78 @param long_keys: Use long keys
79 @type long_keys: Boolean
80 @rtype: str
81 @return: unique ascii identifier
82 """
83 project, sourcefile, resourcetype, groupid, localid, platform = ookey
84 sourcefile = sourcefile.replace('\\', '/')
85 if long_keys:
86 sourcebase = os.path.join(project, sourcefile)
87 else:
88 sourceparts = sourcefile.split('/')
89 sourcebase = "".join(sourceparts[-1:])
90 if len(groupid) == 0 or len(localid) == 0:
91 fullid = groupid + localid
92 else:
93 fullid = groupid + "." + localid
94 if resourcetype:
95 fullid = fullid + "." + resourcetype
96 key = "%s#%s" % (sourcebase, fullid)
97 return normalizefilename(key)
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120 -def escape_text(text):
121 """Escapes SDF text to be suitable for unit consumption."""
122 return text.replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")
123
124
125 -def unescape_text(text):
126 """Unescapes SDF text to be suitable for unit consumption."""
127 return text.replace("\\\\", "\a").replace("\\n", "\n").replace("\\t", "\t").\
128 replace("\\r", "\r").replace("\a", "\\\\")
129
130 helptagre = re.compile('''<[/]??[a-z_\-]+?(?:| +[a-z]+?=".*?") *[/]??>''')
131
132
134 """Escapes the help text as it would be in an SDF file.
135
136 <, >, " are only escaped in <[[:lower:]]> tags. Some HTML tags make it in in
137 lowercase so those are dealt with. Some OpenOffice.org help tags are not
138 escaped.
139 """
140 text = text.replace("\\", "\\\\")
141 for tag in helptagre.findall(text):
142 escapethistag = False
143 for escape_tag in ["ahelp", "link", "item", "emph", "defaultinline", "switchinline", "caseinline", "variable", "bookmark_value", "image", "embedvar", "alt"]:
144 if tag.startswith("<%s" % escape_tag) or tag == "</%s>" % escape_tag:
145 escapethistag = True
146 if tag in ["<br/>", "<help-id-missing/>"]:
147 escapethistag = True
148 if escapethistag:
149 escaped_tag = ("\\<" + tag[1:-1] + "\\>").replace('"', '\\"')
150 text = text.replace(tag, escaped_tag)
151 return text
152
153
155 """Unescapes normal text to be suitable for writing to the SDF file."""
156 return text.replace(r"\<", "<").replace(r"\>", ">").replace(r'\"', '"').replace(r"\\", "\\")
157
158
160 """Encode a Unicode string the the specified encoding"""
161 if isinstance(text, unicode):
162 return text.encode('UTF-8')
163 return text
164
165
167 """this represents one line, one translation in an .oo file"""
168
170 """construct an ooline from its parts"""
171 if parts is None:
172 self.project, self.sourcefile, self.dummy, self.resourcetype, \
173 self.groupid, self.localid, self.helpid, self.platform, \
174 self.width, self.languageid, self.text, self.helptext, \
175 self.quickhelptext, self.title, self.timestamp = [""] * 15
176 else:
177 self.setparts(parts)
178
180 """create a line from its tab-delimited parts"""
181 if len(parts) != 15:
182 warnings.warn("oo line contains %d parts, it should contain 15: %r" % \
183 (len(parts), parts))
184 newparts = list(parts)
185 if len(newparts) < 15:
186 newparts = newparts + [""] * (15-len(newparts))
187 else:
188 newparts = newparts[:15]
189 parts = tuple(newparts)
190 self.project, self.sourcefile, self.dummy, self.resourcetype, \
191 self.groupid, self.localid, self.helpid, self.platform, \
192 self.width, self.languageid, self._text, self.helptext, \
193 self.quickhelptext, self.title, self.timestamp = parts
194
196 """return a list of parts in this line"""
197 return (self.project, self.sourcefile, self.dummy, self.resourcetype,
198 self.groupid, self.localid, self.helpid, self.platform,
199 self.width, self.languageid, self._text, self.helptext,
200 self.quickhelptext, self.title, self.timestamp)
201
203 """Obtains the text column and handle escaping."""
204 if self.sourcefile.endswith(".xhp"):
205 return unescape_help_text(self._text)
206 else:
207 return unescape_text(self._text)
208
209 - def settext(self, text):
210 """Sets the text column and handle escaping."""
211 if self.sourcefile.endswith(".xhp"):
212 self._text = escape_help_text(text)
213 else:
214 self._text = escape_text(text)
215 text = property(gettext, settext)
216
220
222 """return a line in tab-delimited form"""
223 parts = self.getparts()
224 return "\t".join(parts)
225
227 """get the key that identifies the resource"""
228 return (self.project, self.sourcefile, self.resourcetype, self.groupid,
229 self.localid, self.platform)
230
231
233 """this represents a number of translations of a resource"""
234
236 """construct the oounit"""
237 self.languages = {}
238 self.lines = []
239
241 """add a line to the oounit"""
242 self.languages[line.languageid] = line
243 self.lines.append(line)
244
248
249 - def getoutput(self, skip_source=False, fallback_lang=None):
250 """return the lines in tab-delimited form"""
251 if skip_source:
252 lines = self.lines[1:]
253 if not lines:
254
255 new_line = ooline(self.lines[0].getparts())
256 new_line.languageid = fallback_lang
257 lines = [new_line]
258 else:
259 lines = self.lines
260 return "\r\n".join([str(line) for line in lines])
261
262
264 """this represents an entire .oo file"""
265 UnitClass = oounit
266
268 """constructs the oofile"""
269 self.oolines = []
270 self.units = []
271 self.ookeys = {}
272 self.filename = ""
273 self.languages = []
274 if input is not None:
275 self.parse(input)
276
278 """adds a parsed line to the file"""
279 key = thisline.getkey()
280 element = self.ookeys.get(key, None)
281 if element is None:
282 element = self.UnitClass()
283 self.units.append(element)
284 self.ookeys[key] = element
285 element.addline(thisline)
286 self.oolines.append(thisline)
287 if thisline.languageid not in self.languages:
288 self.languages.append(thisline.languageid)
289
291 """parses lines and adds them to the file"""
292 if not self.filename:
293 self.filename = getattr(input, 'name', '')
294 if hasattr(input, "read"):
295 src = input.read()
296 input.close()
297 else:
298 src = input
299 for line in src.split("\n"):
300 line = quote.rstripeol(line)
301 if not line:
302 continue
303 parts = line.split("\t")
304 thisline = ooline(parts)
305 self.addline(thisline)
306
307 - def __str__(self, skip_source=False, fallback_lang=None):
310
311 - def getoutput(self, skip_source=False, fallback_lang=None):
312 """converts all the lines back to tab-delimited form"""
313 lines = []
314 for oe in self.units:
315 if len(oe.lines) > 2:
316 warnings.warn("contains %d lines (should be 2 at most): languages %r" % (len(oe.lines), oe.languages))
317 oekeys = [line.getkey() for line in oe.lines]
318 warnings.warn("contains %d lines (should be 2 at most): keys %r" % (len(oe.lines), oekeys))
319 oeline = oe.getoutput(skip_source, fallback_lang) + "\r\n"
320 lines.append(oeline)
321 return "".join(lines)
322
323
325 """this takes a huge GSI file and represents it as multiple smaller files..."""
326
327 - def __init__(self, filename, mode=None, multifilestyle="single"):
328 """initialises oomultifile from a seekable inputfile or writable outputfile"""
329 self.filename = filename
330 if mode is None:
331 if os.path.exists(filename):
332 mode = 'r'
333 else:
334 mode = 'w'
335 self.mode = mode
336 self.multifilestyle = multifilestyle
337 self.multifilename = os.path.splitext(filename)[0]
338 self.multifile = open(filename, mode)
339 self.subfilelines = {}
340 if mode == "r":
341 self.createsubfileindex()
342
344 """reads in all the lines and works out the subfiles"""
345 linenum = 0
346 for line in self.multifile:
347 subfile = self.getsubfilename(line)
348 if not subfile in self.subfilelines:
349 self.subfilelines[subfile] = []
350 self.subfilelines[subfile].append(linenum)
351 linenum += 1
352
354 """looks up the subfile name for the line"""
355 if line.count("\t") < 2:
356 raise ValueError("invalid tab-delimited line: %r" % line)
357 lineparts = line.split("\t", 2)
358 module, filename = lineparts[0], lineparts[1]
359 if self.multifilestyle == "onefile":
360 ooname = self.multifilename
361 elif self.multifilestyle == "toplevel":
362 ooname = module
363 else:
364 filename = filename.replace("\\", "/")
365 fileparts = [module] + filename.split("/")
366 ooname = os.path.join(*fileparts[:-1])
367 return ooname + os.extsep + "oo"
368
370 """returns a list of subfiles in the file"""
371 return self.subfilelines.keys()
372
374 """iterates through the subfile names"""
375 for subfile in self.listsubfiles():
376 yield subfile
377
379 """checks if this pathname is a valid subfile"""
380 return pathname in self.subfilelines
381
383 """returns the list of lines matching the subfile"""
384 lines = []
385 requiredlines = dict.fromkeys(self.subfilelines[subfile])
386 linenum = 0
387 self.multifile.seek(0)
388 for line in self.multifile:
389 if linenum in requiredlines:
390 lines.append(line)
391 linenum += 1
392 return "".join(lines)
393
400
402 """returns a pseudo-file object for the given subfile"""
403
404 def onclose(contents):
405 self.multifile.write(contents)
406 self.multifile.flush()
407 outputfile = wStringIO.CatchStringOutput(onclose)
408 outputfile.filename = subfile
409 return outputfile
410
412 """returns an oofile built up from the given subfile's lines"""
413 subfilesrc = self.getsubfilesrc(subfile)
414 oosubfile = oofile()
415 oosubfile.filename = subfile
416 oosubfile.parse(subfilesrc)
417 return oosubfile
418