Package translate :: Package storage :: Package xml_extract :: Module generate
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xml_extract.generate

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  import lxml.etree as etree 
 24   
 25  from translate.storage import base 
 26   
 27  from translate.misc.typecheck import accepts, IsCallable 
 28  from translate.misc.typecheck.typeclasses import Number 
 29  from translate.storage.xml_extract import misc 
 30  from translate.storage.xml_extract import extract 
 31  from translate.storage.xml_extract import unit_tree 
 32  from translate.storage.xml_name import XmlNamer 
33 34 @accepts(etree._Element) 35 -def _get_tag_arrays(dom_node):
36 """Return a dictionary indexed by child tag names, where each tag is associated with an array 37 of all the child nodes with matching the tag name, in the order in which they appear as children 38 of dom_node. 39 40 >>> xml = '<a><b></b><c></c><b></b><d/></a>' 41 >>> element = etree.fromstring(xml) 42 >>> get_tag_arrays(element) 43 {'b': [<Element a at 84df144>, <Element a at 84df148>], 'c': [<Element a at 84df120>], 'd': [<Element a at 84df152>]} 44 """ 45 child_dict = {} 46 for child in dom_node: 47 if child.tag not in child_dict: 48 child_dict[child.tag] = [] 49 child_dict[child.tag].append(child) 50 return child_dict
51
52 @accepts(etree._Element, unit_tree.XPathTree, IsCallable()) 53 -def apply_translations(dom_node, unit_node, do_translate):
54 tag_array = _get_tag_arrays(dom_node) 55 for unit_child_index, unit_child in unit_node.children.iteritems(): 56 tag, index = unit_child_index 57 try: 58 dom_child = tag_array[XmlNamer(dom_node).name(tag)][index] 59 apply_translations(dom_child, unit_child, do_translate) 60 # Raised if tag is not in tag_array. We might want to complain to the 61 # user in the future. 62 except KeyError: 63 pass 64 # Raised if index is not in tag_array[tag]. We might want to complain to 65 # the user in the future 66 except IndexError: 67 pass 68 # If there is a translation unit associated with this unit_node... 69 if unit_node.unit != None: 70 # The invoke do_translate on the dom_node and the unit; do_translate 71 # should replace the text in dom_node with the text in unit_node. 72 do_translate(dom_node, unit_node.unit)
73
74 @accepts(IsCallable(), etree._Element, state=[Number]) 75 -def reduce_dom_tree(f, dom_node, *state):
76 return misc.reduce_tree(f, dom_node, dom_node, lambda dom_node: dom_node, *state)
77
78 @accepts(etree._Element, etree._Element) 79 -def find_dom_root(parent_dom_node, dom_node):
80 """@see: L{find_placeable_dom_tree_roots}""" 81 if dom_node is None or parent_dom_node is None: 82 return None 83 if dom_node.getparent() == parent_dom_node: 84 return dom_node 85 elif dom_node.getparent() is None: 86 return None 87 else: 88 return find_dom_root(parent_dom_node, dom_node.getparent())
89 90 @accepts(extract.Translatable)
91 -def find_placeable_dom_tree_roots(unit_node):
92 """For an inline placeable, find the root DOM node for the placeable in its 93 parent. 94 95 Consider the diagram. In this pseudo-ODF example, there is an inline span 96 element. However, the span is contained in other tags (which we never process). 97 When splicing the template DOM tree (that is, the DOM which comes from 98 the XML document we're using to generate a translated XML document), we'll 99 need to move DOM sub-trees around and we need the roots of these sub-trees:: 100 101 <p> This is text \/ <- Paragraph containing an inline placeable 102 <blah> <- Inline placeable's root (which we want to find) 103 ... <- Any number of intermediate DOM nodes 104 <span> bold text <- The inline placeable's Translatable 105 holds a reference to this DOM node 106 """ 107 108 def set_dom_root_for_unit_node(parent_unit_node, unit_node, dom_tree_roots): 109 dom_tree_roots[unit_node] = find_dom_root(parent_unit_node.dom_node, unit_node.dom_node) 110 return dom_tree_roots
111 return extract.reduce_unit_tree(set_dom_root_for_unit_node, unit_node, {}) 112 113 @accepts(extract.Translatable, etree._Element)
114 -def _map_source_dom_to_doc_dom(unit_node, source_dom_node):
115 """Creating a mapping from the DOM nodes in source_dom_node which correspond to 116 placeables, with DOM nodes in the XML document template (this information is obtained 117 from unit_node). We are interested in DOM nodes in the XML document template which 118 are the roots of placeables. See the diagram below, as well as 119 L{find_placeable_dom_tree_roots}. 120 121 XLIFF Source (below):: 122 <source>This is text <g> bold text</g> and a footnote<x/></source> 123 / \________ 124 / \ 125 <p>This is text<blah>...<span> bold text</span>...</blah> and <note>...</note></p> 126 Input XML document used as a template (above) 127 128 In the above diagram, the XLIFF source DOM node <g> is associated with the XML 129 document DOM node <blah>, whereas the XLIFF source DOM node <x> is associated with 130 the XML document DOM node <note>. 131 """ 132 dom_tree_roots = find_placeable_dom_tree_roots(unit_node) 133 source_dom_to_doc_dom = {} 134 135 def loop(unit_node, source_dom_node): 136 for child_unit_node, child_source_dom in zip(unit_node.placeables, source_dom_node): 137 source_dom_to_doc_dom[child_source_dom] = dom_tree_roots[child_unit_node] 138 loop(child_unit_node, child_source_dom)
139 140 loop(unit_node, source_dom_node) 141 return source_dom_to_doc_dom 142
143 @accepts(etree._Element, etree._Element) 144 -def _map_target_dom_to_source_dom(source_dom_node, target_dom_node):
145 """Associate placeables in source_dom_node and target_dom_node which 146 have the same 'id' attributes. 147 148 We're using XLIFF placeables. The XLIFF standard requires that 149 placeables have unique ids. The id of a placeable is never modified, 150 which means that even if placeables are moved around in a translation, 151 we can easily associate placeables from the source text with placeables 152 in the target text. 153 154 This function does exactly that. 155 """ 156 157 def map_id_to_dom_node(parent_node, node, id_to_dom_node): 158 # If this DOM node has an 'id' attribute, then add an id -> node 159 # mapping to 'id_to_dom_node'. 160 if u'id' in node.attrib: 161 id_to_dom_node[node.attrib[u'id']] = node 162 return id_to_dom_node
163 164 # Build a mapping of id attributes to the DOM nodes which have these ids. 165 id_to_dom_node = reduce_dom_tree(map_id_to_dom_node, target_dom_node, {}) 166 167 def map_target_dom_to_source_dom_aux(parent_node, node, target_dom_to_source_dom): 168 # 169 if u'id' in node.attrib and node.attrib[u'id'] in id_to_dom_node: 170 target_dom_to_source_dom[id_to_dom_node[node.attrib[u'id']]] = node 171 return target_dom_to_source_dom 172 173 # For each node in the DOM tree rooted at source_dom_node: 174 # 1. Check whether the node has an 'id' attribute. 175 # 2. If so, check whether there is a mapping of this id to a target DOM node 176 # in id_to_dom_node. 177 # 3. If so, associate this source DOM node with the target DOM node. 178 return reduce_dom_tree(map_target_dom_to_source_dom_aux, source_dom_node, {}) 179
180 -def _build_target_dom_to_doc_dom(unit_node, source_dom, target_dom):
181 source_dom_to_doc_dom = _map_source_dom_to_doc_dom(unit_node, source_dom) 182 target_dom_to_source_dom = _map_target_dom_to_source_dom(source_dom, target_dom) 183 return misc.compose_mappings(target_dom_to_source_dom, source_dom_to_doc_dom)
184
185 @accepts(etree._Element, {etree._Element: etree._Element}) 186 -def _get_translated_node(target_node, target_dom_to_doc_dom):
187 """Convenience function to get node corresponding to 'target_node' 188 and to assign the tail text of 'target_node' to this node.""" 189 dom_node = target_dom_to_doc_dom[target_node] 190 dom_node.tail = target_node.tail 191 return dom_node
192
193 @accepts(etree._Element, etree._Element, {etree._Element: etree._Element}) 194 -def _build_translated_dom(dom_node, target_node, target_dom_to_doc_dom):
195 """Use the "shape" of 'target_node' (which is a DOM tree) to insert nodes 196 into the DOM tree rooted at 'dom_node'. 197 198 The mapping 'target_dom_to_doc_dom' is used to map nodes from 'target_node' 199 to nodes which much be inserted into dom_node. 200 """ 201 dom_node.text = target_node.text 202 # 1. Find all child nodes of target_node. 203 # 2. Filter out the children which map to None. 204 # 3. Call _get_translated_node on the remaining children; this maps a node in 205 # 'target_node' to a node in 'dom_node' and assigns the tail text of 'target_node' 206 # to the mapped node. 207 # 4. Add all of these mapped nodes to 'dom_node' 208 dom_node.extend(_get_translated_node(child, target_dom_to_doc_dom) for child in target_node 209 if target_dom_to_doc_dom[child] is not None) 210 # Recursively call this function on pairs of matched children in 211 # dom_node and target_node. 212 for dom_child, target_child in zip(dom_node, target_node): 213 _build_translated_dom(dom_child, target_child, target_dom_to_doc_dom)
214
215 @accepts(IsCallable()) 216 -def replace_dom_text(make_parse_state):
217 """Return a function:: 218 219 action: etree_Element x base.TranslationUnit -> None 220 221 which takes a dom_node and a translation unit. The dom_node is rearranged 222 according to rearrangement of placeables in unit.target (relative to their 223 positions in unit.source). 224 """ 225 226 @accepts(etree._Element, base.TranslationUnit) 227 def action(dom_node, unit): 228 """Use the unit's target (or source in the case where there is no translation) 229 to update the text in the dom_node and at the tails of its children.""" 230 source_dom = unit.source_dom 231 if unit.target_dom is not None: 232 target_dom = unit.target_dom 233 else: 234 target_dom = unit.source_dom 235 # Build a tree of (non-DOM) nodes which correspond to the translatable DOM nodes in 'dom_node'. 236 # Pass in a fresh parse_state every time, so as avoid working with stale parse state info. 237 unit_node = extract.find_translatable_dom_nodes(dom_node, make_parse_state())[0] 238 target_dom_to_doc_dom = _build_target_dom_to_doc_dom(unit_node, source_dom, target_dom) 239 # Before we start reconstructing the sub-tree rooted at dom_node, we must clear out its children 240 dom_node[:] = [] 241 _build_translated_dom(dom_node, target_dom, target_dom_to_doc_dom)
242 243 return action 244