1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 from translate.storage import base
26 from translate.lang import data
27 try:
28 from lxml import etree
29 from translate.misc.xml_helpers import *
30 except ImportError, e:
31 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
32
33
35 """generate match objects for all L{re_obj} matches in L{text}."""
36 start = 0
37 max = len(text)
38 while start < max:
39 m = re_obj.search(text, start)
40 if not m: break
41 yield m
42 start = m.end()
43
44
45 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
46 re_placeholders = [re.compile(ph) for ph in placeholders]
56
57
59 """
60 A single unit in the file. Provisional work is done to make several
61 languages possible.
62 """
63
64
65 rootNode = ""
66
67 languageNode = ""
68
69 textNode = ""
70
71 namespace = None
72 _default_xml_space = "preserve"
73 """The default handling of spacing in the absense of an xml:space attribute.
74
75 This is mostly for correcting XLIFF behaviour."""
76
77 - def __init__(self, source, empty=False, **kwargs):
78 """Constructs a unit containing the given source string"""
79 if empty:
80 self._rich_source = None
81 self._rich_target = None
82 return
83 self.xmlelement = etree.Element(self.rootNode)
84
85 super(LISAunit, self).__init__(source)
86
102
104 """Returns name in Clark notation.
105
106 For example namespaced("source") in an XLIFF document might return::
107 {urn:oasis:names:tc:xliff:document:1.1}source
108 This is needed throughout lxml.
109 """
110 return namespaced(self.namespace, name)
111
113 languageNodes = self.getlanguageNodes()
114 if len(languageNodes) > 0:
115 self.xmlelement[0] = dom_node
116 else:
117 self.xmlelement.append(dom_node)
118
121 source_dom = property(get_source_dom, set_source_dom)
122
126
129 source = property(getsource, setsource)
130
132 languageNodes = self.getlanguageNodes()
133 assert len(languageNodes) > 0
134 if dom_node is not None:
135 if append or len(languageNodes) == 1:
136 self.xmlelement.append(dom_node)
137 else:
138 self.xmlelement.insert(1, dom_node)
139 if not append and len(languageNodes) > 1:
140 self.xmlelement.remove(languageNodes[1])
141
147 target_dom = property(get_target_dom)
148
149 - def settarget(self, text, lang='xx', append=False):
172
177 target = property(gettarget, settarget)
178
180 """Returns a xml Element setup with given parameters to represent a
181 single language entry. Has to be overridden."""
182 return None
183
185 """Create the text node in parent containing all the ph tags"""
186 matches = _getPhMatches(text)
187 if not matches:
188 parent.text = text
189 return
190
191
192 start = matches[0].start()
193 pretext = text[:start]
194 if pretext:
195 parent.text = pretext
196 lasttag = parent
197 for i, m in enumerate(matches):
198
199 pretext = text[start:m.start()]
200
201 if pretext:
202 lasttag.tail = pretext
203
204 phnode = etree.SubElement(parent, "ph")
205 phnode.set("id", str(i+1))
206 phnode.text = m.group()
207 lasttag = phnode
208 start = m.end()
209
210 if text[start:]:
211 lasttag.tail = text[start:]
212
214 """Returns a list of all nodes that contain per language information."""
215 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
216
218 """Retrieves a languageNode either by language or by index"""
219 if lang is None and index is None:
220 raise KeyError("No criterea for languageNode given")
221 languageNodes = self.getlanguageNodes()
222 if lang:
223 for set in languageNodes:
224 if getXMLlang(set) == lang:
225 return set
226 else:
227 if index >= len(languageNodes):
228 return None
229 else:
230 return languageNodes[index]
231 return None
232
233 - def getNodeText(self, languageNode, xml_space="preserve"):
234 """Retrieves the term from the given languageNode"""
235 if languageNode is None:
236 return None
237 if self.textNode:
238 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
239 if terms is None:
240 return None
241 else:
242 return getText(terms.next(), xml_space)
243 else:
244 return getText(languageNode, xml_space)
245
247 return etree.tostring(self.xmlelement, pretty_print=True, encoding='utf-8')
248
251
252 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
253 lambda self, value: self._set_property(self.namespaced('xid'), value))
254
255 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
256 lambda self, value: self._set_property(self.namespaced('rid'), value))
257
259 term = cls(None, empty=True)
260 term.xmlelement = element
261 return term
262 createfromxmlElement = classmethod(createfromxmlElement)
263
265 """A class representing a file store for one of the LISA file formats."""
266 UnitClass = LISAunit
267
268 rootNode = ""
269
270 bodyNode = ""
271
272 XMLskeleton = ""
273
274 namespace = None
275
276 - def __init__(self, inputfile=None, sourcelanguage='en', targetlanguage=None, unitclass=None):
289
291 """Method to be overridden to initialise headers, etc."""
292 pass
293
295 """Returns name in Clark notation.
296
297 For example namespaced("source") in an XLIFF document might return::
298 {urn:oasis:names:tc:xliff:document:1.1}source
299 This is needed throughout lxml.
300 """
301 return namespaced(self.namespace, name)
302
303 - def initbody(self):
304 """Initialises self.body so it never needs to be retrieved from the XML again."""
305 self.namespace = self.document.getroot().nsmap.get(None, None)
306 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
307
309
310 """Adds and returns a new unit with the given string as first entry."""
311 newunit = self.UnitClass(source)
312 self.addunit(newunit)
313 return newunit
314
315 - def addunit(self, unit, new=True):
320
322 """Converts to a string containing the file's XML"""
323 return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8')
324
326 """Populates this object from the given xml string"""
327 if not hasattr(self, 'filename'):
328 self.filename = getattr(xml, 'name', '')
329 if hasattr(xml, "read"):
330 xml.seek(0)
331 posrc = xml.read()
332 xml = posrc
333 if etree.LXML_VERSION > (2, 1, 0):
334
335
336 parser = etree.XMLParser(strip_cdata=False)
337 else:
338 parser = etree.XMLParser()
339 self.document = etree.fromstring(xml, parser).getroottree()
340 self._encoding = self.document.docinfo.encoding
341 self.initbody()
342 assert self.document.getroot().tag == self.namespaced(self.rootNode)
343 for entry in self.body.iterdescendants(self.namespaced(self.UnitClass.rootNode)):
344 term = self.UnitClass.createfromxmlElement(entry)
345 self.addunit(term, new=False)
346