1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Grep XLIFF, Gettext PO and TMX localization files
23
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
26
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
30
31 import re
32 import locale
33
34 from translate.storage import factory
35 from translate.storage.poheader import poheader
36 from translate.misc import optrecurse
37 from translate.misc.multistring import multistring
38 from translate.lang import data
39
40
42 """Just a small data structure that represents a search match."""
43
44
45 - def __init__(self, unit, part='target', part_n=0, start=0, end=0):
46 self.unit = unit
47 self.part = part
48 self.part_n = part_n
49 self.start = start
50 self.end = end
51
52
54 if self.part == 'target':
55 if self.unit.hasplural():
56 getter = lambda: self.unit.target.strings[self.part_n]
57 else:
58 getter = lambda: self.unit.target
59 return getter
60 elif self.part == 'source':
61 if self.unit.hasplural():
62 getter = lambda: self.unit.source.strings[self.part_n]
63 else:
64 getter = lambda: self.unit.source
65 return getter
66 elif self.part == 'notes':
67
68 def getter():
69 return self.unit.getnotes()[self.part_n]
70 return getter
71 elif self.part == 'locations':
72
73 def getter():
74 return self.unit.getlocations()[self.part_n]
75 return getter
76
78 if self.part == 'target':
79 if self.unit.hasplural():
80
81 def setter(value):
82 strings = self.unit.target.strings
83 strings[self.part_n] = value
84 self.unit.target = strings
85 else:
86
87 def setter(value):
88 self.unit.target = value
89 return setter
90
91
100
103
104
106 """Calculate the real index in the unnormalized string that corresponds to
107 the index nfc_index in the normalized string."""
108 length = nfc_index
109 max_length = len(string)
110 while len(data.normalize(string[:length])) <= nfc_index:
111 if length == max_length:
112 return length
113 length += 1
114 return length - 1
115
116
129
130
132
133 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
134 invertmatch=False, keeptranslations=False, accelchar=None, encoding='utf-8',
135 max_matches=0):
136 """builds a checkfilter using the given checker"""
137 if isinstance(searchstring, unicode):
138 self.searchstring = searchstring
139 else:
140 self.searchstring = searchstring.decode(encoding)
141 self.searchstring = data.normalize(self.searchstring)
142 if searchparts:
143
144
145 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
146 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
147 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
148 self.search_locations = 'locations' in searchparts
149 else:
150 self.search_source = True
151 self.search_target = True
152 self.search_notes = False
153 self.search_locations = False
154 self.ignorecase = ignorecase
155 if self.ignorecase:
156 self.searchstring = self.searchstring.lower()
157 self.useregexp = useregexp
158 if self.useregexp:
159 self.searchpattern = re.compile(self.searchstring)
160 self.invertmatch = invertmatch
161 self.keeptranslations = keeptranslations
162 self.accelchar = accelchar
163 self.max_matches = max_matches
164
166 if teststr is None:
167 return False
168 teststr = data.normalize(teststr)
169 if self.ignorecase:
170 teststr = teststr.lower()
171 if self.accelchar:
172 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
173 teststr = re.sub(self.accelchar, "", teststr)
174 if self.useregexp:
175 found = self.searchpattern.search(teststr)
176 else:
177 found = teststr.find(self.searchstring) != -1
178 if self.invertmatch:
179 found = not found
180 return found
181
183 """runs filters on an element"""
184 if unit.isheader():
185 return True
186
187 if self.keeptranslations and unit.target:
188 return True
189
190 if self.search_source:
191 if isinstance(unit.source, multistring):
192 strings = unit.source.strings
193 else:
194 strings = [unit.source]
195 for string in strings:
196 if self.matches(string):
197 return True
198
199 if self.search_target:
200 if isinstance(unit.target, multistring):
201 strings = unit.target.strings
202 else:
203 strings = [unit.target]
204 for string in strings:
205 if self.matches(string):
206 return True
207
208 if self.search_notes:
209 if self.matches(unit.getnotes()):
210 return True
211 if self.search_locations:
212 if self.matches(u" ".join(unit.getlocations())):
213 return True
214 return False
215
228
230 if not self.searchstring:
231 return [], []
232
233 searchstring = self.searchstring
234 flags = re.LOCALE | re.MULTILINE | re.UNICODE
235
236 if self.ignorecase:
237 flags |= re.IGNORECASE
238 if not self.useregexp:
239 searchstring = re.escape(searchstring)
240 self.re_search = re.compile(u'(%s)' % (searchstring), flags)
241
242 matches = []
243 indexes = []
244
245 for index, unit in enumerate(units):
246 old_length = len(matches)
247
248 if self.search_target:
249 if unit.hasplural():
250 targets = unit.target.strings
251 else:
252 targets = [unit.target]
253 matches.extend(find_matches(unit, 'target', targets, self.re_search))
254 if self.search_source:
255 if unit.hasplural():
256 sources = unit.source.strings
257 else:
258 sources = [unit.source]
259 matches.extend(find_matches(unit, 'source', sources, self.re_search))
260 if self.search_notes:
261 matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search))
262
263 if self.search_locations:
264 matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search))
265
266
267
268
269 if self.max_matches and len(matches) > self.max_matches:
270 raise Exception("Too many matches found")
271
272 if len(matches) > old_length:
273 old_length = len(matches)
274 indexes.append(index)
275
276 return matches, indexes
277
278
280 """a specialized Option Parser for the grep tool..."""
281
283 """parses the command line options, handling implicit input/output args"""
284 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
285
286 if args:
287 options.searchstring = args[0]
288 args = args[1:]
289 else:
290 self.error("At least one argument must be given for the search string")
291 if args and not options.input:
292 if not options.output:
293 options.input = args[:-1]
294 args = args[-1:]
295 else:
296 options.input = args
297 args = []
298 if args and not options.output:
299 options.output = args[-1]
300 args = args[:-1]
301 if args:
302 self.error("You have used an invalid combination of --input, --output and freestanding args")
303 if isinstance(options.input, list) and len(options.input) == 1:
304 options.input = options.input[0]
305 return (options, args)
306
308 """sets the usage string - if usage not given, uses getusagestring for each option"""
309 if usage is None:
310 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
311 else:
312 super(GrepOptionParser, self).set_usage(usage)
313
322
323
324 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
325 """reads in inputfile, filters using checkfilter, writes to outputfile"""
326 fromfile = factory.getobject(inputfile)
327 tofile = checkfilter.filterfile(fromfile)
328 if tofile.isempty():
329 return False
330 outputfile.write(str(tofile))
331 return True
332
333
335 formats = {"po": ("po", rungrep), "pot": ("pot", rungrep),
336 "mo": ("mo", rungrep), "gmo": ("gmo", rungrep),
337 "tmx": ("tmx", rungrep),
338 "xliff": ("xliff", rungrep), "xlf": ("xlf", rungrep), "xlff": ("xlff", rungrep),
339 None: ("po", rungrep)}
340 parser = GrepOptionParser(formats)
341 parser.add_option("", "--search", dest="searchparts",
342 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment"],
343 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
344 parser.add_option("-I", "--ignore-case", dest="ignorecase",
345 action="store_true", default=False, help="ignore case distinctions")
346 parser.add_option("-e", "--regexp", dest="useregexp",
347 action="store_true", default=False, help="use regular expression matching")
348 parser.add_option("-v", "--invert-match", dest="invertmatch",
349 action="store_true", default=False, help="select non-matching lines")
350 parser.add_option("", "--accelerator", dest="accelchar",
351 action="store", type="choice", choices=["&", "_", "~"],
352 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
353 parser.add_option("-k", "--keep-translations", dest="keeptranslations",
354 action="store_true", default=False, help="always extract units with translations")
355 parser.set_usage()
356 parser.passthrough.append('checkfilter')
357 parser.description = __doc__
358 return parser
359
360
362 parser = cmdlineparser()
363 parser.run()
364
365
366 if __name__ == '__main__':
367 main()
368