1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22  """classes that hold units of .po files (pounit) or entire files (pofile) 
 23  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 24   
 25  from __future__ import generators 
 26  from translate.misc.multistring import multistring 
 27  from translate.misc import quote 
 28  from translate.misc import textwrap 
 29  from translate.lang import data 
 30  from translate.storage import pocommon 
 31  import re 
 32   
 33  lsep = "\n#: " 
 34  """Seperator for #: entries""" 
 35   
 36   
 37   
 38  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 39  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 40   
 42      """Escapes a line for po format. assumes no \n occurs in the line. 
 43       
 44      @param line: unescaped text 
 45      """ 
 46      special_locations = [] 
 47      for special_key in po_escape_map: 
 48          special_locations.extend(quote.find_all(line, special_key)) 
 49      special_locations = dict.fromkeys(special_locations).keys() 
 50      special_locations.sort() 
 51      escaped_line = "" 
 52      last_location = 0 
 53      for location in special_locations: 
 54          escaped_line += line[last_location:location] 
 55          escaped_line += po_escape_map[line[location:location+1]] 
 56          last_location = location+1 
 57      escaped_line += line[last_location:] 
 58      return escaped_line 
  59   
 63   
 65      """Wrap text for po files.""" 
 66      wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 
 67   
 68       
 69      if len(wrappedlines) > 1: 
 70          for index, line in enumerate(wrappedlines[1:]): 
 71              if line.startswith(' '): 
 72                   
 73                  wrappedlines[index+1] = line[1:] 
 74   
 75                   
 76                  wrappedlines[index] += ' ' 
 77      return wrappedlines 
  78   
 80      """quotes the given text for a PO file, returning quoted and escaped lines""" 
 81      polines = [] 
 82      if text is None: 
 83          return polines 
 84      lines = text.split("\n") 
 85      if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 
 86          if len(lines) != 2 or lines[1]: 
 87              polines.extend(['""']) 
 88          for line in lines[:-1]: 
 89              lns = wrapline(line) 
 90              if len(lns) > 0: 
 91                  for ln in lns[:-1]: 
 92                      polines.extend(['"' + escapeforpo(ln) + '"']) 
 93                  if lns[-1]: 
 94                      polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 
 95              else: 
 96                  polines.extend(['"\\n"']) 
 97      if lines[-1]: 
 98          polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 
 99      return polines 
 100   
102      """Remove quote and unescape line from po file. 
103        
104      @param line: a quoted line from a po file (msgid or msgstr) 
105      """ 
106      extracted = quote.extractwithoutquotes(line,'"','"','\\',includeescapes=unescapehandler)[0] 
107      return extracted 
 108   
110      if joinwithlinebreak: 
111          joiner = u"\n" 
112          if postr and postr[0] == '""': postr = postr[1:] 
113      else: 
114          joiner = u"" 
115      return joiner.join([extractpoline(line) for line in postr]) 
 116   
118      """Tests whether the given encoding is known in the python runtime, or returns utf-8. 
119      This function is used to ensure that a valid encoding is always used.""" 
120      if encoding == "CHARSET" or encoding == None: return 'utf-8' 
121      return encoding 
 122   
123   
124   
125   
126   
127   
128   
129   
130  """ 
131  From the GNU gettext manual: 
132       WHITE-SPACE 
133       #  TRANSLATOR-COMMENTS 
134       #. AUTOMATIC-COMMENTS 
135       #| PREVIOUS MSGID                 (Gettext 0.16 - check if this is the correct position - not yet implemented) 
136       #: REFERENCE... 
137       #, FLAG... 
138       msgctxt CONTEXT                   (Gettext 0.15) 
139       msgid UNTRANSLATED-STRING 
140       msgstr TRANSLATED-STRING 
141  """ 
142   
143 -class pounit(pocommon.pounit): 
 144       
145       
146       
147       
148       
149       
150       
151       
152   
153 -    def __init__(self, source=None, encoding="UTF-8"): 
 154          self._encoding = encodingToUse(encoding) 
155          self.obsolete = False 
156          self._initallcomments(blankall=True) 
157          self.msgctxt = [] 
158          self.msgid = [] 
159          self.msgid_pluralcomments = [] 
160          self.msgid_plural = [] 
161          self.msgstr = [] 
162          self.obsoletemsgctxt = [] 
163          self.obsoletemsgid = [] 
164          self.obsoletemsgid_pluralcomments = [] 
165          self.obsoletemsgid_plural = [] 
166          self.obsoletemsgstr = [] 
167          if source: 
168              self.setsource(source) 
169          super(pounit, self).__init__(source) 
 170   
186   
188          """Returns the unescaped msgid""" 
189          multi = multistring(unquotefrompo(self.msgid), self._encoding) 
190          if self.hasplural(): 
191              pluralform = unquotefrompo(self.msgid_plural) 
192              if isinstance(pluralform, str): 
193                  pluralform = pluralform.decode(self._encoding) 
194              multi.strings.append(pluralform) 
195          return multi 
 196   
212      source = property(getsource, setsource) 
213   
221   
223          """Sets the msgstr to the given (unescaped) value""" 
224          if isinstance(target, str): 
225              target = target.decode(self._encoding) 
226          if target == self.target: 
227              return 
228          if self.hasplural(): 
229              if isinstance(target, multistring): 
230                  target = target.strings 
231              elif isinstance(target, basestring): 
232                  target = [target] 
233          elif isinstance(target,(dict, list)): 
234              if len(target) == 1: 
235                  target = target[0] 
236              else: 
237                  raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 
238          templates = self.msgstr 
239          if isinstance(templates, list): 
240              templates = {0: templates} 
241          if isinstance(target, list): 
242              self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 
243          elif isinstance(target, dict): 
244              self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 
245          else: 
246              self.msgstr = quoteforpo(target) 
 247      target = property(gettarget, settarget) 
248   
250          """Return comments based on origin value (programmer, developer, source code and translator)""" 
251          if origin == None: 
252              comments = u"".join([comment[2:] for comment in self.othercomments]) 
253              comments += u"".join([comment[3:] for comment in self.automaticcomments]) 
254          elif origin == "translator": 
255              comments = u"".join ([comment[2:] for comment in self.othercomments]) 
256          elif origin in ["programmer", "developer", "source code"]: 
257              comments = u"".join([comment[3:] for comment in self.automaticcomments]) 
258          else: 
259              raise ValueError("Comment type not valid") 
260           
261          return comments[:-1] 
 262   
263 -    def addnote(self, text, origin=None, position="append"): 
 264          """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 
265           
266          if not text: 
267              return 
268          text = data.forceunicode(text) 
269          commentlist = self.othercomments 
270          linestart = "# " 
271          if origin in ["programmer", "developer", "source code"]: 
272              autocomments = True 
273              commentlist = self.automaticcomments 
274              linestart = "#. " 
275          text = text.split("\n") 
276          if position == "append": 
277              commentlist += [linestart + line + "\n" for line in text] 
278          else: 
279              newcomments = [linestart + line + "\n" for line in text] 
280              newcomments += [line for line in commentlist] 
281              if autocomments: 
282                  self.automaticcomments = newcomments 
283              else: 
284                  self.othercomments = newcomments 
 285           
287          """Remove all the translator's notes (other comments)""" 
288          self.othercomments = [] 
 289   
291          newpo = self.__class__() 
292          newpo.othercomments = self.othercomments[:] 
293          newpo.automaticcomments = self.automaticcomments[:] 
294          newpo.sourcecomments = self.sourcecomments[:] 
295          newpo.typecomments = self.typecomments[:] 
296          newpo.obsolete = self.obsolete 
297          newpo.msgidcomments = self.msgidcomments[:] 
298          newpo._initallcomments() 
299          newpo.msgctxt = self.msgctxt[:] 
300          newpo.msgid = self.msgid[:] 
301          newpo.msgid_pluralcomments = self.msgid_pluralcomments[:] 
302          newpo.msgid_plural = self.msgid_plural[:] 
303          if isinstance(self.msgstr, dict): 
304              newpo.msgstr = self.msgstr.copy() 
305          else: 
306              newpo.msgstr = self.msgstr[:] 
307               
308          newpo.obsoletemsgctxt = self.obsoletemsgctxt[:] 
309          newpo.obsoletemsgid = self.obsoletemsgid[:] 
310          newpo.obsoletemsgid_pluralcomments = self.obsoletemsgid_pluralcomments[:] 
311          newpo.obsoletemsgid_plural = self.obsoletemsgid_plural[:] 
312          if isinstance(self.obsoletemsgstr, dict): 
313              newpo.obsoletemsgstr = self.obsoletemsgstr.copy() 
314          else: 
315              newpo.obsoletemsgstr = self.obsoletemsgstr[:] 
316          return newpo 
 317   
323   
325          if isinstance(self.msgstr, dict): 
326              combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()]) 
327              return len(combinedstr.strip()) 
328          else: 
329              return len(unquotefrompo(self.msgstr).strip()) 
 330   
331 -    def merge(self, otherpo, overwrite=False, comments=True, authoritative=False): 
 332          """Merges the otherpo (with the same msgid) into this one. 
333   
334          Overwrite non-blank self.msgstr only if overwrite is True 
335          merge comments only if comments is True 
336           
337          """ 
338   
339          def mergelists(list1, list2, split=False): 
340               
341              if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 
342                  for position, item in enumerate(list1): 
343                      if isinstance(item, str): 
344                          list1[position] = item.decode("utf-8") 
345                  for position, item in enumerate(list2): 
346                      if isinstance(item, str): 
347                          list2[position] = item.decode("utf-8") 
348                           
349               
350              lineend = "" 
351              if list1 and list1[0]: 
352                  for candidate in ["\n", "\r", "\n\r"]: 
353                      if list1[0].endswith(candidate): 
354                          lineend = candidate 
355                  if not lineend: 
356                      lineend = "" 
357              else: 
358                  lineend = "\n" 
359               
360               
361              if split: 
362                  splitlist1 = [] 
363                  splitlist2 = [] 
364                  prefix = "#" 
365                  for item in list1: 
366                      splitlist1.extend(item.split()[1:]) 
367                      prefix = item.split()[0] 
368                  for item in list2: 
369                      splitlist2.extend(item.split()[1:]) 
370                      prefix = item.split()[0] 
371                  list1.extend(["%s %s%s" % (prefix,item,lineend) for item in splitlist2 if not item in splitlist1]) 
372              else: 
373                   
374                  if list1 != list2: 
375                      for item in list2: 
376                          if lineend: 
377                              item = item.rstrip() + lineend 
378                           
379                          if item not in list1 or len(item) < 5: 
380                              list1.append(item) 
 381          if not isinstance(otherpo, pounit): 
382              super(pounit, self).merge(otherpo, overwrite, comments) 
383              return 
384          if comments: 
385              mergelists(self.othercomments, otherpo.othercomments) 
386              mergelists(self.typecomments, otherpo.typecomments) 
387              if not authoritative: 
388                   
389                   
390                  mergelists(self.automaticcomments, otherpo.automaticcomments) 
391                  mergelists(self.msgidcomments, otherpo.msgidcomments) 
392                  mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 
393          if not self.istranslated() or overwrite: 
394               
395              if self._extract_msgidcomments(otherpo.target): 
396                  otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 
397              self.target = otherpo.target 
398              if self.source != otherpo.source: 
399                  self.markfuzzy() 
400              else: 
401                  self.markfuzzy(otherpo.isfuzzy()) 
402          elif not otherpo.istranslated(): 
403              if self.source != otherpo.source: 
404                  self.markfuzzy() 
405          else: 
406              if self.target != otherpo.target: 
407                  self.markfuzzy() 
 408   
410           
411           
412          return ((self.msgid == [] or self.msgid == ['""']) and  
413                          not (self.msgstr == [] or self.msgstr == ['""'])  
414                          and self.msgidcomments == [] 
415                          and (self.msgctxt == [] or self.msgctxt == ['""']) 
416                          and (self.sourcecomments == [] or self.sourcecomments == [""])) 
 417   
419          if self.isheader() or len(self.msgidcomments): 
420              return False 
421          if (self.msgidlen() == 0) and (self.msgstrlen() == 0): 
422              return True 
423          return False 
 424           
425           
426           
427   
432   
440   
450   
453   
456   
459   
462   
465   
468   
470          """Makes this unit obsolete""" 
471          self.obsolete = True 
472          if self.msgctxt: 
473              self.obsoletemsgctxt = self.msgctxt 
474          if self.msgid: 
475              self.obsoletemsgid = self.msgid 
476              self.msgid = [] 
477          if self.msgidcomments: 
478              self.obsoletemsgidcomments = self.msgidcomments 
479              self.msgidcomments = [] 
480          if self.msgid_plural: 
481              self.obsoletemsgid_plural = self.msgid_plural 
482              self.msgid_plural = [] 
483          if self.msgstr: 
484              self.obsoletemsgstr = self.msgstr 
485              self.msgstr = [] 
486          self.sourcecomments = [] 
487          self.automaticcomments = [] 
 488   
490          """Makes an obsolete unit normal""" 
491          self.obsolete = False 
492          if self.obsoletemsgctxt: 
493              self.msgid = self.obsoletemsgctxt 
494              self.obsoletemsgctxt = [] 
495          if self.obsoletemsgid: 
496              self.msgid = self.obsoletemsgid 
497              self.obsoletemsgid = [] 
498          if self.obsoletemsgidcomments: 
499              self.msgidcomments = self.obsoletemsgidcomments 
500              self.obsoletemsgidcomments = [] 
501          if self.obsoletemsgid_plural: 
502              self.msgid_plural = self.obsoletemsgid_plural 
503              self.obsoletemsgid_plural = [] 
504          if self.obsoletemsgstr: 
505              self.msgstr = self.obsoletemsgstr 
506              self.obsoletemgstr = [] 
 507   
509          """returns whether this pounit contains plural strings...""" 
510          return len(self.msgid_plural) > 0 
 511   
513          if isinstance(src, str): 
514               
515              src = src.decode(self._encoding) 
516          inmsgctxt = 0 
517          inmsgid = 0 
518          inmsgid_comment = 0 
519          inmsgid_plural = 0 
520          inmsgstr = 0 
521          msgstr_pluralid = None 
522          linesprocessed = 0 
523          for line in src.split("\n"): 
524              line = line + "\n" 
525              linesprocessed += 1 
526              if len(line) == 0: 
527                  continue 
528              elif line[0] == '#': 
529                  if inmsgstr and not line[1] == '~': 
530                       
531                      break 
532                  if line[1] == '.': 
533                      self.automaticcomments.append(line) 
534                  elif line[1] == ':': 
535                      self.sourcecomments.append(line) 
536                  elif line[1] == ',': 
537                      self.typecomments.append(line) 
538                  elif line[1] == '~': 
539                      line = line[3:] 
540                      self.obsolete = True 
541                  else: 
542                      self.othercomments.append(line) 
543              if line.startswith('msgid_plural'): 
544                  inmsgctxt = 0 
545                  inmsgid = 0 
546                  inmsgid_plural = 1 
547                  inmsgstr = 0 
548                  inmsgid_comment = 0 
549              elif line.startswith('msgctxt'): 
550                  inmsgctxt = 1 
551                  inmsgid = 0 
552                  inmsgid_plural = 0 
553                  inmsgstr = 0 
554                  inmsgid_comment = 0 
555              elif line.startswith('msgid'): 
556                   
557                   
558                  if inmsgstr or inmsgid_plural: 
559                      break 
560                  inmsgctxt = 0 
561                  inmsgid = 1 
562                  inmsgid_plural = 0 
563                  inmsgstr = 0 
564                  inmsgid_comment = 0 
565              elif line.startswith('msgstr'): 
566                  inmsgctxt = 0 
567                  inmsgid = 0 
568                  inmsgid_plural = 0 
569                  inmsgstr = 1 
570                  if line.startswith('msgstr['): 
571                      msgstr_pluralid = int(line[len('msgstr['):line.find(']')].strip()) 
572                  else: 
573                      msgstr_pluralid = None 
574              extracted = quote.extractstr(line) 
575              if not extracted is None: 
576                  if inmsgctxt: 
577                      self.msgctxt.append(extracted) 
578                  elif inmsgid: 
579                       
580                      if extracted.find("_:") != -1: 
581                          inmsgid_comment = 1 
582                      if inmsgid_comment: 
583                          self.msgidcomments.append(extracted) 
584                      else: 
585                          self.msgid.append(extracted) 
586                      if inmsgid_comment and extracted.find("\\n") != -1: 
587                          inmsgid_comment = 0 
588                  elif inmsgid_plural: 
589                      if extracted.find("_:") != -1: 
590                          inmsgid_comment = 1 
591                      if inmsgid_comment: 
592                          self.msgid_pluralcomments.append(extracted) 
593                      else: 
594                          self.msgid_plural.append(extracted) 
595                      if inmsgid_comment and extracted.find("\\n") != -1: 
596                          inmsgid_comment = 0 
597                  elif inmsgstr: 
598                      if msgstr_pluralid is None: 
599                          self.msgstr.append(extracted) 
600                      else: 
601                          if type(self.msgstr) == list: 
602                              self.msgstr = {0: self.msgstr} 
603                          if msgstr_pluralid not in self.msgstr: 
604                              self.msgstr[msgstr_pluralid] = [] 
605                          self.msgstr[msgstr_pluralid].append(extracted) 
606          if self.obsolete: 
607              self.makeobsolete() 
608           
609           
610          if self.isheader(): 
611              charset = re.search("charset=([^\\s]+)", unquotefrompo(self.msgstr)) 
612              if charset: 
613                  self._encoding = encodingToUse(charset.group(1)) 
614          return linesprocessed 
 615   
617          if isinstance(partlines, dict): 
618              partkeys = partlines.keys() 
619              partkeys.sort() 
620              return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 
621          partstr = partname + " " 
622          partstartline = 0 
623          if len(partlines) > 0 and len(partcomments) == 0: 
624              partstr += partlines[0] 
625              partstartline = 1 
626          elif len(partcomments) > 0: 
627              if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 
628                   
629                  partstr += partlines[0] + '\n' 
630                   
631                  if len(partlines) > 1: 
632                      partstartline += 1 
633              else: 
634                   
635                  partstr += '""\n' 
636               
637              if len(partcomments) > 1: 
638                  combinedcomment = [] 
639                  for comment in partcomments: 
640                      comment = unquotefrompo([comment]) 
641                      if comment.startswith("_:"): 
642                          comment = comment[len("_:"):] 
643                      if comment.endswith("\\n"): 
644                          comment = comment[:-len("\\n")] 
645                       
646                      combinedcomment.append(comment) 
647                  partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 
648               
649              partstr += "\n".join(partcomments) 
650              partstr = quote.rstripeol(partstr) 
651          else: 
652              partstr += '""' 
653          partstr += '\n' 
654           
655          for partline in partlines[partstartline:]: 
656              partstr += partline + '\n' 
657          return partstr 
 658   
660          """encodes unicode strings and returns other strings unchanged""" 
661          if isinstance(output, unicode): 
662              encoding = encodingToUse(getattr(self, "encoding", "UTF-8")) 
663              return output.encode(encoding) 
664          return output 
 665   
667          """convert to a string. double check that unicode is handled somehow here""" 
668          output = self._getoutput() 
669          return self._encodeifneccessary(output) 
 670   
672          """return this po element as a string""" 
673          lines = [] 
674          lines.extend(self.othercomments) 
675          if self.isobsolete(): 
676              lines.extend(self.typecomments) 
677              obsoletelines = [] 
678              if self.obsoletemsgctxt: 
679                  obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 
680              obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 
681              if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 
682                  obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 
683              obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 
684              for index, obsoleteline in enumerate(obsoletelines): 
685                   
686                  obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 
687              lines.extend(obsoletelines) 
688              lines = [self._encodeifneccessary(line) for line in lines] 
689              return "".join(lines) 
690           
691           
692          if (len(self.msgid) == 0) or ((len(self.msgid) == 1) and (self.msgid[0] == '""')): 
693              if not (self.isheader() or self.msgidcomments or self.sourcecomments): 
694                  return "".join(lines) 
695          lines.extend(self.automaticcomments) 
696          lines.extend(self.sourcecomments) 
697          lines.extend(self.typecomments) 
698          if self.msgctxt: 
699              lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) 
700          lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments)) 
701          if self.msgid_plural or self.msgid_pluralcomments: 
702              lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 
703          lines.append(self._getmsgpartstr("msgstr", self.msgstr)) 
704          lines = [self._encodeifneccessary(line) for line in lines] 
705          postr = "".join(lines) 
706          return postr 
 707   
709          """Get a list of locations from sourcecomments in the PO unit 
710   
711          rtype: List 
712          return: A list of the locations with '#: ' stripped 
713   
714          """ 
715          locations = [] 
716          for sourcecomment in self.sourcecomments: 
717              locations += quote.rstripeol(sourcecomment)[3:].split() 
718          return locations 
 719   
721          """Add a location to sourcecomments in the PO unit 
722   
723          @param location: Text location e.g. 'file.c:23' does not include #: 
724          @type location: String 
725   
726          """ 
727          self.sourcecomments.append("#: %s\n" % location) 
 728   
740   
741 -    def getcontext(self): 
 742          """Get the message context.""" 
743          return unquotefrompo(self.msgctxt) + self._extract_msgidcomments() 
 744   
746          """Returns a unique identifier for this unit.""" 
747          context = self.getcontext() 
748           
749           
750           
751           
752   
753          id = self.source 
754          if self.msgidcomments: 
755              id = "_: %s\n%s" % (context, id) 
756          elif context: 
757              id = "%s\04%s" % (context, id) 
758          return id 
 759   
760 -class pofile(pocommon.pofile): 
 761      """this represents a .po file containing various units""" 
762      UnitClass = pounit 
764          """construct a pofile, optionally reading in from inputfile. 
765          encoding can be specified but otherwise will be read from the PO header""" 
766          self.UnitClass = unitclass 
767          pocommon.pofile.__init__(self, unitclass=unitclass) 
768          self.units = [] 
769          self.filename = '' 
770          self._encoding = encodingToUse(encoding) 
771          if inputfile is not None: 
772              self.parse(inputfile) 
 773   
775          """changes the encoding on the file""" 
776          self._encoding = encodingToUse(newencoding) 
777          if not self.units: 
778              return 
779          header = self.header() 
780          if not header or header.isblank(): 
781              return 
782          charsetline = None 
783          headerstr = unquotefrompo(header.msgstr, True) 
784          for line in headerstr.split("\\n"): 
785              if not ":" in line: continue 
786              key, value = line.strip().split(":", 1) 
787              if key.strip() != "Content-Type": continue 
788              charsetline = line 
789          if charsetline is None: 
790              headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 
791          else: 
792              charset = re.search("charset=([^ ]*)", charsetline) 
793              if charset is None: 
794                  newcharsetline = charsetline 
795                  if not newcharsetline.strip().endswith(";"): 
796                      newcharsetline += ";" 
797                  newcharsetline += " charset=%s" % self._encoding 
798              else: 
799                  charset = charset.group(1) 
800                  newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 
801              headerstr = headerstr.replace(charsetline, newcharsetline, 1) 
802          header.msgstr = quoteforpo(headerstr) 
 803   
805          """parses the given file or file source string""" 
806          if hasattr(input, 'name'): 
807              self.filename = input.name 
808          elif not getattr(self, 'filename', ''): 
809              self.filename = '' 
810          if hasattr(input, "read"): 
811              posrc = input.read() 
812              input.close() 
813              input = posrc 
814           
815          lines = input.split("\n") 
816          start = 0 
817          end = 0 
818           
819          linesprocessed = 0 
820          while end <= len(lines): 
821              if (end == len(lines)) or (not lines[end].strip()):   
822                  newpe = self.UnitClass(encoding=self._encoding) 
823                  linesprocessed = newpe.parse("\n".join(lines[start:end])) 
824                  start += linesprocessed 
825                   
826                  if linesprocessed >= 1 and newpe._getoutput(): 
827                      self.units.append(newpe) 
828                      if newpe.isheader(): 
829                          if "Content-Type" in self.parseheader(): 
830                              self._encoding = newpe._encoding 
831                           
832                          if self._encoding is not None and self._encoding.lower() != 'charset': 
833                              lines = self.decode(lines) 
834                      if self._encoding is None:  
835                           
836                          self._encoding = 'utf-8' 
837                          lines = self.decode(lines) 
838                          self.units = [] 
839                          start = 0 
840                          end = 0 
841              end = end+1 
 842   
844          """make sure each msgid is unique ; merge comments etc from duplicates into original""" 
845          msgiddict = {} 
846          uniqueunits = [] 
847           
848           
849          markedpos = [] 
850          def addcomment(thepo): 
851              thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 
852              markedpos.append(thepo) 
 853          for thepo in self.units: 
854              if duplicatestyle.startswith("msgid_comment"): 
855                  msgid = unquotefrompo(thepo.msgidcomments) + unquotefrompo(thepo.msgid) 
856              else: 
857                  msgid = unquotefrompo(thepo.msgid) 
858              if thepo.isheader(): 
859                   
860                  uniqueunits.append(thepo) 
861              elif duplicatestyle == "msgid_comment_all": 
862                  addcomment(thepo) 
863                  uniqueunits.append(thepo) 
864              elif msgid in msgiddict: 
865                  if duplicatestyle == "merge": 
866                      if msgid: 
867                          msgiddict[msgid].merge(thepo) 
868                      else: 
869                          addcomment(thepo) 
870                          uniqueunits.append(thepo) 
871                  elif duplicatestyle == "keep": 
872                      uniqueunits.append(thepo) 
873                  elif duplicatestyle == "msgid_comment": 
874                      origpo = msgiddict[msgid] 
875                      if origpo not in markedpos: 
876                          addcomment(origpo) 
877                      addcomment(thepo) 
878                      uniqueunits.append(thepo) 
879                  elif duplicatestyle == "msgctxt": 
880                      origpo = msgiddict[msgid] 
881                      if origpo not in markedpos: 
882                          origpo.msgctxt.append('"%s"' % " ".join(origpo.getlocations())) 
883                          markedpos.append(thepo) 
884                      thepo.msgctxt.append('"%s"' % " ".join(thepo.getlocations())) 
885                      uniqueunits.append(thepo) 
886              else: 
887                  if not msgid and duplicatestyle != "keep": 
888                      addcomment(thepo) 
889                  msgiddict[msgid] = thepo 
890                  uniqueunits.append(thepo) 
891          self.units = uniqueunits 
 892   
894          """convert to a string. double check that unicode is handled somehow here""" 
895          output = self._getoutput() 
896          if isinstance(output, unicode): 
897              return output.encode(getattr(self, "encoding", "UTF-8")) 
898          return output 
 899   
901          """convert the units back to lines""" 
902          lines = [] 
903          for unit in self.units: 
904              unitsrc = str(unit) + "\n" 
905              lines.append(unitsrc) 
906          lines = "".join(self.encode(lines)).rstrip() 
907           
908          if lines: lines += "\n" 
909          return lines 
 910   
912          """encode any unicode strings in lines in self._encoding""" 
913          newlines = [] 
914          encoding = self._encoding 
915          if encoding is None or encoding.lower() == "charset": 
916              encoding = 'UTF-8' 
917          for line in lines: 
918              if isinstance(line, unicode): 
919                  line = line.encode(encoding) 
920              newlines.append(line) 
921          return newlines 
 922   
924          """decode any non-unicode strings in lines with self._encoding""" 
925          newlines = [] 
926          for line in lines: 
927              if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 
928                  try: 
929                      line = line.decode(self._encoding) 
930                  except UnicodeError, e: 
931                      raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 
932              newlines.append(line) 
933          return newlines 
 934   
939   
940  if __name__ == '__main__': 
941      import sys 
942      pf = pofile(sys.stdin) 
943      sys.stdout.write(str(pf)) 
944