Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  from translate.misc.multistring import multistring 
 26  from translate.misc import quote 
 27  from translate.misc import textwrap 
 28  from translate.lang import data 
 29  from translate.storage import pocommon, base 
 30  import re 
 31  import copy 
 32  import cStringIO 
 33  import poparser 
 34   
 35  lsep = "\n#: " 
 36  """Seperator for #: entries""" 
 37   
 38  # general functions for quoting / unquoting po strings 
 39   
 40  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 41  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 42   
43 -def escapeforpo(line):
44 """Escapes a line for po format. assumes no \n occurs in the line. 45 46 @param line: unescaped text 47 """ 48 special_locations = [] 49 for special_key in po_escape_map: 50 special_locations.extend(quote.find_all(line, special_key)) 51 special_locations = dict.fromkeys(special_locations).keys() 52 special_locations.sort() 53 escaped_line = "" 54 last_location = 0 55 for location in special_locations: 56 escaped_line += line[last_location:location] 57 escaped_line += po_escape_map[line[location:location+1]] 58 last_location = location+1 59 escaped_line += line[last_location:] 60 return escaped_line
61
62 -def unescapehandler(escape):
63 64 return po_unescape_map.get(escape, escape)
65
66 -def wrapline(line):
67 """Wrap text for po files.""" 68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 69 70 # Lines should not start with a space... 71 if len(wrappedlines) > 1: 72 for index, line in enumerate(wrappedlines[1:]): 73 if line.startswith(' '): 74 # Remove the space at the beginning of the line: 75 wrappedlines[index+1] = line[1:] 76 77 # Append a space to the previous line: 78 wrappedlines[index] += ' ' 79 return wrappedlines
80
81 -def quoteforpo(text):
82 """quotes the given text for a PO file, returning quoted and escaped lines""" 83 polines = [] 84 if text is None: 85 return polines 86 lines = text.split("\n") 87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 88 if len(lines) != 2 or lines[1]: 89 polines.extend(['""']) 90 for line in lines[:-1]: 91 #TODO: We should only wrap after escaping 92 lns = wrapline(line) 93 if len(lns) > 0: 94 for ln in lns[:-1]: 95 polines.extend(['"' + escapeforpo(ln) + '"']) 96 if lns[-1]: 97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 98 else: 99 polines.extend(['"\\n"']) 100 if lines[-1]: 101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 102 return polines
103
104 -def extractpoline(line):
105 """Remove quote and unescape line from po file. 106 107 @param line: a quoted line from a po file (msgid or msgstr) 108 """ 109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 110 return extracted
111
112 -def unquotefrompo(postr):
113 return u"".join([extractpoline(line) for line in postr])
114
115 -def encodingToUse(encoding):
116 """Tests whether the given encoding is known in the python runtime, or returns utf-8. 117 This function is used to ensure that a valid encoding is always used.""" 118 if encoding == "CHARSET" or encoding == None: return 'utf-8' 119 return encoding
120 # if encoding is None: return False 121 # return True 122 # try: 123 # tuple = codecs.lookup(encoding) 124 # except LookupError: 125 # return False 126 # return True 127
128 -def is_null(lst):
129 return lst == [] or len(lst) == 1 and lst[0] == '""'
130
131 -def extractstr(string):
132 left = string.find('"') 133 right = string.rfind('"') 134 if right > -1: 135 return string[left:right+1] 136 else: 137 return string[left:] + '"'
138
139 -class pounit(pocommon.pounit):
140 # othercomments = [] # # this is another comment 141 # automaticcomments = [] # #. comment extracted from the source code 142 # sourcecomments = [] # #: sourcefile.xxx:35 143 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 144 # prev_msgid = [] # 145 # prev_msgid_plural = [] # 146 # typecomments = [] # #, fuzzy 147 # msgidcomments = [] # _: within msgid 148 # msgctxt 149 # msgid = [] 150 # msgstr = [] 151 152 # Our homegrown way to indicate what must be copied in a shallow 153 # fashion 154 __shallow__ = ['_store'] 155
156 - def __init__(self, source=None, encoding="UTF-8"):
157 self._encoding = encodingToUse(encoding) 158 self.obsolete = False 159 self._initallcomments(blankall=True) 160 self.prev_msgctxt = [] 161 self.prev_msgid = [] 162 self.prev_msgid_plural = [] 163 self.msgctxt = [] 164 self.msgid = [] 165 self.msgid_pluralcomments = [] 166 self.msgid_plural = [] 167 self.msgstr = [] 168 self.obsoletemsgctxt = [] 169 self.obsoletemsgid = [] 170 self.obsoletemsgid_pluralcomments = [] 171 self.obsoletemsgid_plural = [] 172 self.obsoletemsgstr = [] 173 pocommon.pounit.__init__(self, source)
174
175 - def _initallcomments(self, blankall=False):
176 """Initialises allcomments""" 177 if blankall: 178 self.othercomments = [] 179 self.automaticcomments = [] 180 self.sourcecomments = [] 181 self.typecomments = [] 182 self.msgidcomments = [] 183 self.obsoletemsgidcomments = []
184
185 - def _get_all_comments(self):
186 return [self.othercomments, 187 self.automaticcomments, 188 self.sourcecomments, 189 self.typecomments, 190 self.msgidcomments, 191 self.obsoletemsgidcomments]
192 193 allcomments = property(_get_all_comments) 194
195 - def _get_source_vars(self, msgid, msgid_plural):
196 multi = multistring(unquotefrompo(msgid), self._encoding) 197 if self.hasplural(): 198 pluralform = unquotefrompo(msgid_plural) 199 if isinstance(pluralform, str): 200 pluralform = pluralform.decode(self._encoding) 201 multi.strings.append(pluralform) 202 return multi
203
204 - def _set_source_vars(self, source):
205 msgid = None 206 msgid_plural = None 207 if isinstance(source, str): 208 source = source.decode(self._encoding) 209 if isinstance(source, multistring): 210 source = source.strings 211 if isinstance(source, list): 212 msgid = quoteforpo(source[0]) 213 if len(source) > 1: 214 msgid_plural = quoteforpo(source[1]) 215 else: 216 msgid_plural = [] 217 else: 218 msgid = quoteforpo(source) 219 msgid_plural = [] 220 return msgid, msgid_plural
221
222 - def getsource(self):
223 """Returns the unescaped msgid""" 224 return self._get_source_vars(self.msgid, self.msgid_plural)
225
226 - def setsource(self, source):
227 """Sets the msgid to the given (unescaped) value. 228 229 @param source: an unescaped source string. 230 """ 231 self.msgid, self.msgid_plural = self._set_source_vars(source)
232 source = property(getsource, setsource) 233
234 - def _get_prev_source(self):
235 """Returns the unescaped msgid""" 236 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
237
238 - def _set_prev_source(self, source):
239 """Sets the msgid to the given (unescaped) value. 240 241 @param source: an unescaped source string. 242 """ 243 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
244 prev_source = property(_get_prev_source, _set_prev_source) 245
246 - def gettarget(self):
247 """Returns the unescaped msgstr""" 248 if isinstance(self.msgstr, dict): 249 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 250 else: 251 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 252 return multi
253
254 - def settarget(self, target):
255 """Sets the msgstr to the given (unescaped) value""" 256 self._rich_target = None 257 if isinstance(target, str): 258 target = target.decode(self._encoding) 259 if self.hasplural(): 260 if isinstance(target, multistring): 261 target = target.strings 262 elif isinstance(target, basestring): 263 target = [target] 264 elif isinstance(target,(dict, list)): 265 if len(target) == 1: 266 target = target[0] 267 else: 268 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 269 templates = self.msgstr 270 if isinstance(templates, list): 271 templates = {0: templates} 272 if isinstance(target, list): 273 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 274 elif isinstance(target, dict): 275 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 276 else: 277 self.msgstr = quoteforpo(target)
278 target = property(gettarget, settarget) 279
280 - def getnotes(self, origin=None):
281 """Return comments based on origin value (programmer, developer, source code and translator)""" 282 if origin == None: 283 comments = u"".join([comment[2:] for comment in self.othercomments]) 284 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 285 elif origin == "translator": 286 comments = u"".join ([comment[2:] for comment in self.othercomments]) 287 elif origin in ["programmer", "developer", "source code"]: 288 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 289 else: 290 raise ValueError("Comment type not valid") 291 # Let's drop the last newline 292 return comments[:-1]
293
294 - def addnote(self, text, origin=None, position="append"):
295 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 296 # ignore empty strings and strings without non-space characters 297 if not (text and text.strip()): 298 return 299 text = data.forceunicode(text) 300 commentlist = self.othercomments 301 linestart = "# " 302 if origin in ["programmer", "developer", "source code"]: 303 autocomments = True 304 commentlist = self.automaticcomments 305 linestart = "#. " 306 text = text.split("\n") 307 if position == "append": 308 commentlist += [linestart + line + "\n" for line in text] 309 else: 310 newcomments = [linestart + line + "\n" for line in text] 311 newcomments += [line for line in commentlist] 312 if autocomments: 313 self.automaticcomments = newcomments 314 else: 315 self.othercomments = newcomments
316
317 - def removenotes(self):
318 """Remove all the translator's notes (other comments)""" 319 self.othercomments = []
320
321 - def __deepcopy__(self, memo={}):
322 # Make an instance to serve as the copy 323 new_unit = self.__class__() 324 # We'll be testing membership frequently, so make a set from 325 # self.__shallow__ 326 shallow = set(self.__shallow__) 327 # Make deep copies of all members which are not in shallow 328 for key, value in self.__dict__.iteritems(): 329 if key not in shallow: 330 setattr(new_unit, key, copy.deepcopy(value)) 331 # Make shallow copies of all members which are in shallow 332 for key in set(shallow): 333 setattr(new_unit, key, getattr(self, key)) 334 # Mark memo with ourself, so that we won't get deep copied 335 # again 336 memo[id(self)] = self 337 # Return our copied unit 338 return new_unit
339
340 - def copy(self):
341 return copy.deepcopy(self)
342
343 - def _msgidlen(self):
344 if self.hasplural(): 345 return len(unquotefrompo(self.msgid).strip()) + len(unquotefrompo(self.msgid_plural).strip()) 346 else: 347 return len(unquotefrompo(self.msgid).strip())
348
349 - def _msgstrlen(self):
350 if isinstance(self.msgstr, dict): 351 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()]) 352 return len(combinedstr.strip()) 353 else: 354 return len(unquotefrompo(self.msgstr).strip())
355
356 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
357 """Merges the otherpo (with the same msgid) into this one. 358 359 Overwrite non-blank self.msgstr only if overwrite is True 360 merge comments only if comments is True 361 """ 362 363 def mergelists(list1, list2, split=False): 364 #decode where necessary 365 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 366 for position, item in enumerate(list1): 367 if isinstance(item, str): 368 list1[position] = item.decode("utf-8") 369 for position, item in enumerate(list2): 370 if isinstance(item, str): 371 list2[position] = item.decode("utf-8") 372 373 #Determine the newline style of list1 374 lineend = "" 375 if list1 and list1[0]: 376 for candidate in ["\n", "\r", "\n\r"]: 377 if list1[0].endswith(candidate): 378 lineend = candidate 379 if not lineend: 380 lineend = "" 381 else: 382 lineend = "\n" 383 384 #Split if directed to do so: 385 if split: 386 splitlist1 = [] 387 splitlist2 = [] 388 prefix = "#" 389 for item in list1: 390 splitlist1.extend(item.split()[1:]) 391 prefix = item.split()[0] 392 for item in list2: 393 splitlist2.extend(item.split()[1:]) 394 prefix = item.split()[0] 395 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 396 else: 397 #Normal merge, but conform to list1 newline style 398 if list1 != list2: 399 for item in list2: 400 if lineend: 401 item = item.rstrip() + lineend 402 # avoid duplicate comment lines (this might cause some problems) 403 if item not in list1 or len(item) < 5: 404 list1.append(item)
405 if not isinstance(otherpo, pounit): 406 super(pounit, self).merge(otherpo, overwrite, comments) 407 return 408 if comments: 409 mergelists(self.othercomments, otherpo.othercomments) 410 mergelists(self.typecomments, otherpo.typecomments) 411 if not authoritative: 412 # We don't bring across otherpo.automaticcomments as we consider ourself 413 # to be the the authority. Same applies to otherpo.msgidcomments 414 mergelists(self.automaticcomments, otherpo.automaticcomments) 415 mergelists(self.msgidcomments, otherpo.msgidcomments) 416 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 417 if not self.istranslated() or overwrite: 418 # Remove kde-style comments from the translation (if any). 419 if self._extract_msgidcomments(otherpo.target): 420 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 421 self.target = otherpo.target 422 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 423 self.markfuzzy() 424 else: 425 self.markfuzzy(otherpo.isfuzzy()) 426 elif not otherpo.istranslated(): 427 if self.source != otherpo.source: 428 self.markfuzzy() 429 else: 430 if self.target != otherpo.target: 431 self.markfuzzy()
432
433 - def isheader(self):
434 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 435 #rewritten here for performance: 436 return (is_null(self.msgid) 437 and not is_null(self.msgstr) 438 and self.msgidcomments == [] 439 and is_null(self.msgctxt) 440 )
441
442 - def isblank(self):
443 if self.isheader() or len(self.msgidcomments): 444 return False 445 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 446 return True 447 return False
448 # TODO: remove: 449 # Before, the equivalent of the following was the final return statement: 450 # return len(self.source.strip()) == 0 451
452 - def hastypecomment(self, typecomment):
453 """check whether the given type comment is present""" 454 # check for word boundaries properly by using a regular expression... 455 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
456
457 - def hasmarkedcomment(self, commentmarker):
458 """check whether the given comment marker is present as # (commentmarker) ...""" 459 commentmarker = "(%s)" % commentmarker 460 for comment in self.othercomments: 461 if comment.replace("#", "", 1).strip().startswith(commentmarker): 462 return True 463 return False
464
465 - def settypecomment(self, typecomment, present=True):
466 """alters whether a given typecomment is present""" 467 if self.hastypecomment(typecomment) != present: 468 if present: 469 self.typecomments.append("#, %s\n" % typecomment) 470 else: 471 # this should handle word boundaries properly ... 472 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 473 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
474
475 - def istranslated(self):
476 return super(pounit, self).istranslated() and not self.isobsolete()
477
478 - def istranslatable(self):
479 return not (self.isheader() or self.isblank())
480
481 - def isfuzzy(self):
482 return self.hastypecomment("fuzzy")
483
484 - def markfuzzy(self, present=True):
485 self.settypecomment("fuzzy", present)
486
487 - def isreview(self):
488 return self.hastypecomment("review") or self.hasmarkedcomment("review") or self.hasmarkedcomment("pofilter")
489
490 - def isobsolete(self):
491 return self.obsolete
492
493 - def makeobsolete(self):
494 """Makes this unit obsolete""" 495 self.obsolete = True 496 if self.msgctxt: 497 self.obsoletemsgctxt = self.msgctxt 498 if self.msgid: 499 self.obsoletemsgid = self.msgid 500 self.msgid = [] 501 if self.msgidcomments: 502 self.obsoletemsgidcomments = self.msgidcomments 503 self.msgidcomments = [] 504 if self.msgid_plural: 505 self.obsoletemsgid_plural = self.msgid_plural 506 self.msgid_plural = [] 507 if self.msgstr: 508 self.obsoletemsgstr = self.msgstr 509 self.msgstr = [] 510 self.sourcecomments = [] 511 self.automaticcomments = []
512
513 - def resurrect(self):
514 """Makes an obsolete unit normal""" 515 self.obsolete = False 516 if self.obsoletemsgctxt: 517 self.msgid = self.obsoletemsgctxt 518 self.obsoletemsgctxt = [] 519 if self.obsoletemsgid: 520 self.msgid = self.obsoletemsgid 521 self.obsoletemsgid = [] 522 if self.obsoletemsgidcomments: 523 self.msgidcomments = self.obsoletemsgidcomments 524 self.obsoletemsgidcomments = [] 525 if self.obsoletemsgid_plural: 526 self.msgid_plural = self.obsoletemsgid_plural 527 self.obsoletemsgid_plural = [] 528 if self.obsoletemsgstr: 529 self.msgstr = self.obsoletemsgstr 530 self.obsoletemgstr = []
531
532 - def hasplural(self):
533 """returns whether this pounit contains plural strings...""" 534 return len(self.msgid_plural) > 0
535
536 - def parse(self, src):
537 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
538
539 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
540 if isinstance(partlines, dict): 541 partkeys = partlines.keys() 542 partkeys.sort() 543 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 544 partstr = partname + " " 545 partstartline = 0 546 if len(partlines) > 0 and len(partcomments) == 0: 547 partstr += partlines[0] 548 partstartline = 1 549 elif len(partcomments) > 0: 550 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 551 # if there is a blank leader line, it must come before the comment 552 partstr += partlines[0] + '\n' 553 # but if the whole string is blank, leave it in 554 if len(partlines) > 1: 555 partstartline += 1 556 else: 557 # All partcomments should start on a newline 558 partstr += '""\n' 559 # combine comments into one if more than one 560 if len(partcomments) > 1: 561 combinedcomment = [] 562 for comment in partcomments: 563 comment = unquotefrompo([comment]) 564 if comment.startswith("_:"): 565 comment = comment[len("_:"):] 566 if comment.endswith("\\n"): 567 comment = comment[:-len("\\n")] 568 #Before we used to strip. Necessary in some cases? 569 combinedcomment.append(comment) 570 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 571 # comments first, no blank leader line needed 572 partstr += "\n".join(partcomments) 573 partstr = quote.rstripeol(partstr) 574 else: 575 partstr += '""' 576 partstr += '\n' 577 # add the rest 578 for partline in partlines[partstartline:]: 579 partstr += partline + '\n' 580 return partstr
581
582 - def _encodeifneccessary(self, output):
583 """encodes unicode strings and returns other strings unchanged""" 584 if isinstance(output, unicode): 585 encoding = encodingToUse(getattr(self, "encoding", "UTF-8")) 586 return output.encode(encoding) 587 return output
588
589 - def __str__(self):
590 """convert to a string. double check that unicode is handled somehow here""" 591 output = self._getoutput() 592 return self._encodeifneccessary(output)
593
594 - def _getoutput(self):
595 """return this po element as a string""" 596 def add_prev_msgid_lines(lines, header, var): 597 if len(var) > 0: 598 lines.append("#| %s %s\n" % (header, var[0])) 599 lines.extend("#| %s\n" % line for line in var[1:])
600 601 def add_prev_msgid_info(lines): 602 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt) 603 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid) 604 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural) 605 606 lines = [] 607 lines.extend(self.othercomments) 608 if self.isobsolete(): 609 lines.extend(self.typecomments) 610 obsoletelines = [] 611 if self.obsoletemsgctxt: 612 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 613 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 614 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 615 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 616 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 617 for index, obsoleteline in enumerate(obsoletelines): 618 # We need to account for a multiline msgid or msgstr here 619 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 620 lines.extend(obsoletelines) 621 lines = [self._encodeifneccessary(line) for line in lines] 622 return "".join(lines) 623 # if there's no msgid don't do msgid and string, unless we're the header 624 # this will also discard any comments other than plain othercomments... 625 if is_null(self.msgid): 626 if not (self.isheader() or self.getcontext() or self.sourcecomments): 627 return "".join(lines) 628 lines.extend(self.automaticcomments) 629 lines.extend(self.sourcecomments) 630 lines.extend(self.typecomments) 631 add_prev_msgid_info(lines) 632 if self.msgctxt: 633 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) 634 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments)) 635 if self.msgid_plural or self.msgid_pluralcomments: 636 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 637 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) 638 lines = [self._encodeifneccessary(line) for line in lines] 639 postr = "".join(lines) 640 return postr 641
642 - def getlocations(self):
643 """Get a list of locations from sourcecomments in the PO unit 644 645 rtype: List 646 return: A list of the locations with '#: ' stripped 647 648 """ 649 locations = [] 650 for sourcecomment in self.sourcecomments: 651 locations += quote.rstripeol(sourcecomment)[3:].split() 652 return locations
653
654 - def addlocation(self, location):
655 """Add a location to sourcecomments in the PO unit 656 657 @param location: Text location e.g. 'file.c:23' does not include #: 658 @type location: String 659 660 """ 661 self.sourcecomments.append("#: %s\n" % location)
662
663 - def _extract_msgidcomments(self, text=None):
664 """Extract KDE style msgid comments from the unit. 665 666 @rtype: String 667 @return: Returns the extracted msgidcomments found in this unit's msgid. 668 """ 669 670 if not text: 671 text = unquotefrompo(self.msgidcomments) 672 return text.split('\n')[0].replace('_: ', '', 1)
673
674 - def getcontext(self):
675 """Get the message context.""" 676 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
677
678 - def getid(self):
679 """Returns a unique identifier for this unit.""" 680 context = self.getcontext() 681 # Gettext does not consider the plural to determine duplicates, only 682 # the msgid. For generation of .mo files, we might want to use this 683 # code to generate the entry for the hash table, but for now, it is 684 # commented out for conformance to gettext. 685 # id = '\0'.join(self.source.strings) 686 id = self.source 687 if self.msgidcomments: 688 id = "_: %s\n%s" % (context, id) 689 elif context: 690 id = "%s\04%s" % (context, id) 691 return id
692
693 -class pofile(pocommon.pofile):
694 """this represents a .po file containing various units""" 695 UnitClass = pounit
696 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit):
697 """construct a pofile, optionally reading in from inputfile. 698 encoding can be specified but otherwise will be read from the PO header""" 699 self.UnitClass = unitclass 700 pocommon.pofile.__init__(self, unitclass=unitclass) 701 self.units = [] 702 self.filename = '' 703 self._encoding = encodingToUse(encoding) 704 if inputfile is not None: 705 self.parse(inputfile)
706
707 - def changeencoding(self, newencoding):
708 """Deprecated: changes the encoding on the file.""" 709 # This should not be here but in poheader. It also shouldn't mangle the 710 # header itself, but use poheader methods. All users are removed, so 711 # we can deprecate after one release. 712 raise DeprecationWarning 713 714 self._encoding = encodingToUse(newencoding) 715 if not self.units: 716 return 717 header = self.header() 718 if not header or header.isblank(): 719 return 720 charsetline = None 721 headerstr = unquotefrompo(header.msgstr) 722 for line in headerstr.split("\n"): 723 if not ":" in line: continue 724 key, value = line.strip().split(":", 1) 725 if key.strip() != "Content-Type": continue 726 charsetline = line 727 if charsetline is None: 728 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 729 else: 730 charset = re.search("charset=([^ ]*)", charsetline) 731 if charset is None: 732 newcharsetline = charsetline 733 if not newcharsetline.strip().endswith(";"): 734 newcharsetline += ";" 735 newcharsetline += " charset=%s" % self._encoding 736 else: 737 charset = charset.group(1) 738 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 739 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 740 header.msgstr = quoteforpo(headerstr)
741
742 - def parse(self, input):
743 """parses the given file or file source string""" 744 try: 745 if hasattr(input, 'name'): 746 self.filename = input.name 747 elif not getattr(self, 'filename', ''): 748 self.filename = '' 749 if isinstance(input, str): 750 input = cStringIO.StringIO(input) 751 poparser.parse_units(poparser.ParseState(input, pounit), self) 752 except Exception, e: 753 raise base.ParseError(e)
754
755 - def removeduplicates(self, duplicatestyle="merge"):
756 """make sure each msgid is unique ; merge comments etc from duplicates into original""" 757 # TODO: can we handle consecutive calls to removeduplicates()? What 758 # about files already containing msgctxt? - test 759 id_dict = {} 760 uniqueunits = [] 761 # TODO: this is using a list as the pos aren't hashable, but this is slow. 762 # probably not used frequently enough to worry about it, though. 763 markedpos = [] 764 def addcomment(thepo): 765 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 766 markedpos.append(thepo)
767 for thepo in self.units: 768 id = thepo.getid() 769 if thepo.isheader() and not thepo.getlocations(): 770 # header msgids shouldn't be merged... 771 uniqueunits.append(thepo) 772 elif id in id_dict: 773 if duplicatestyle == "merge": 774 if id: 775 id_dict[id].merge(thepo) 776 else: 777 addcomment(thepo) 778 uniqueunits.append(thepo) 779 elif duplicatestyle == "msgctxt": 780 origpo = id_dict[id] 781 if origpo not in markedpos: 782 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 783 markedpos.append(thepo) 784 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 785 uniqueunits.append(thepo) 786 else: 787 if not id: 788 if duplicatestyle == "merge": 789 addcomment(thepo) 790 else: 791 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 792 id_dict[id] = thepo 793 uniqueunits.append(thepo) 794 self.units = uniqueunits
795
796 - def __str__(self):
797 """convert to a string. double check that unicode is handled somehow here""" 798 output = self._getoutput() 799 if isinstance(output, unicode): 800 return output.encode(getattr(self, "encoding", "UTF-8")) 801 return output
802
803 - def _getoutput(self):
804 """convert the units back to lines""" 805 lines = [] 806 for unit in self.units: 807 unitsrc = str(unit) + "\n" 808 lines.append(unitsrc) 809 lines = "".join(self.encode(lines)).rstrip() 810 #After the last pounit we will have \n\n and we only want to end in \n: 811 if lines: lines += "\n" 812 return lines
813
814 - def encode(self, lines):
815 """encode any unicode strings in lines in self._encoding""" 816 newlines = [] 817 encoding = self._encoding 818 if encoding is None or encoding.lower() == "charset": 819 encoding = 'UTF-8' 820 for line in lines: 821 if isinstance(line, unicode): 822 line = line.encode(encoding) 823 newlines.append(line) 824 return newlines
825
826 - def decode(self, lines):
827 """decode any non-unicode strings in lines with self._encoding""" 828 newlines = [] 829 for line in lines: 830 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 831 try: 832 line = line.decode(self._encoding) 833 except UnicodeError, e: 834 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 835 newlines.append(line) 836 return newlines
837
838 - def unit_iter(self):
839 for unit in self.units: 840 if not (unit.isheader() or unit.isobsolete()): 841 yield unit
842