1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22  """ 
 23  Classes that hold units of .oo files (oounit) or entire files (oofile). 
 24   
 25  These are specific .oo files for localisation exported by OpenOffice.org - SDF  
 26  format (previously knows as GSI files). For an overview of the format, see 
 27  http://l10n.openoffice.org/L10N_Framework/Intermediate_file_format.html 
 28   
 29  The behaviour in terms of escaping is explained in detail in the programming 
 30  comments. 
 31  """ 
 32   
 33   
 34  import os 
 35  import re 
 36  import sys 
 37  from translate.misc import quote 
 38  from translate.misc import wStringIO 
 39  import warnings 
 40   
 41   
 42   
 43  normalfilenamechars = "/#.0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 
 44  normalizetable = "" 
 45  for i in map(chr, range(256)): 
 46      if i in normalfilenamechars: 
 47          normalizetable += i 
 48      else: 
 49          normalizetable += "_" 
 50   
 53          self.normalchars = {} 
 54          for char in normalchars: 
 55              self.normalchars[ord(char)] = char 
  57          return self.normalchars.get(key, u"_") 
  88 -def escape_text(text): 
  89      """Escapes SDF text to be suitable for unit consumption.""" 
 90      return text.replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r") 
  91       
 92 -def unescape_text(text): 
  93      """Unescapes SDF text to be suitable for unit consumption.""" 
 94      return text.replace("\\\\", "\a").replace("\\n", "\n").replace("\\t", "\t").\ 
 95             replace("\\r", "\r").replace("\a", "\\\\") 
  96   
 97  helptagre = re.compile('''<[/]??[a-z_\-]+?(?:| +[a-z]+?=".*?") *[/]??>''') 
 98   
100      """Escapes the help text as it would be in an SDF file. 
101   
102      <, >, " are only escaped in <[[:lower:]]> tags. Some HTML tags make it in in  
103      lowercase so those are dealt with. Some OpenOffice.org help tags are not  
104      escaped. 
105      """ 
106      text = text.replace("\\", "\\\\") 
107      for tag in helptagre.findall(text): 
108          escapethistag = True 
109          if tag in ["<br>", "<h1>", "</h1>", "<img ...>", "<->", "<empty>"]: 
110              escapethistag = False 
111          for skip in ["<font", "<node", "<help_section"]: 
112              if tag.startswith(skip): 
113                  escapethistag = False 
114          if escapethistag: 
115              escaped_tag = ("\\<" + tag[1:-1] + "\\>").replace('"', '\\"') 
116              text = text.replace(tag, escaped_tag) 
117      return text 
 118   
120      """Unescapes normal text to be suitable for writing to the SDF file.""" 
121      return text.replace(r"\<", "<").replace(r"\>", ">").replace(r'\"', '"').replace(r"\\", "\\") 
 122   
124      """Encode a Unicode string the the specified encoding""" 
125      if isinstance(text, unicode): 
126          return text.encode('UTF-8') 
127      return text 
 128   
129   
131      """this represents one line, one translation in an .oo file""" 
133          """construct an ooline from its parts""" 
134          if parts is None: 
135              self.project, self.sourcefile, self.dummy, self.resourcetype, \ 
136                  self.groupid, self.localid, self.helpid, self.platform, \ 
137                  self.width, self.languageid, self.text, self.helptext, \ 
138                  self.quickhelptext, self.title, self.timestamp = [""] * 15 
139          else: 
140              self.setparts(parts) 
 141   
143          """create a line from its tab-delimited parts""" 
144          if len(parts) != 15: 
145              warnings.warn("oo line contains %d parts, it should contain 15: %r" % \ 
146                      (len(parts), parts)) 
147              newparts = list(parts) 
148              if len(newparts) < 15: 
149                  newparts = newparts + [""] * (15-len(newparts)) 
150              else: 
151                  newparts = newparts[:15] 
152              parts = tuple(newparts) 
153          self.project, self.sourcefile, self.dummy, self.resourcetype, \ 
154              self.groupid, self.localid, self.helpid, self.platform, \ 
155              self.width, self.languageid, self._text, self.helptext, \ 
156              self.quickhelptext, self.title, self.timestamp = parts 
 157   
159          """return a list of parts in this line""" 
160          return (self.project, self.sourcefile, self.dummy, self.resourcetype, 
161                  self.groupid, self.localid, self.helpid, self.platform, 
162                  self.width, self.languageid, self._text, self.helptext,  
163                  self.quickhelptext, self.title, self.timestamp) 
 164   
166          """Obtains the text column and handle escaping.""" 
167          if self.sourcefile.endswith(".xhp"): 
168              return unescape_help_text(self._text) 
169          else: 
170              return unescape_text(self._text) 
 171           
172 -    def settext(self, text): 
 173          """Sets the text column and handle escaping.""" 
174          if self.sourcefile.endswith(".xhp"): 
175              self._text = escape_help_text(text) 
176          else: 
177              self._text = escape_text(text) 
 178      text = property(gettext, settext) 
179   
183   
185          """return a line in tab-delimited form""" 
186          parts = self.getparts() 
187          return "\t".join(parts) 
 188   
190          """get the key that identifies the resource""" 
191          return (self.project, self.sourcefile, self.resourcetype, self.groupid, 
192                  self.localid, self.platform) 
  193   
195      """this represents a number of translations of a resource""" 
197          """construct the oounit""" 
198          self.languages = {} 
199          self.lines = [] 
 200   
202          """add a line to the oounit""" 
203          self.languages[line.languageid] = line 
204          self.lines.append(line) 
 205   
209   
211          """return the lines in tab-delimited form""" 
212          return "\r\n".join([str(line) for line in self.lines]) 
  213   
215      """this represents an entire .oo file""" 
216      UnitClass = oounit 
218          """constructs the oofile""" 
219          self.oolines = [] 
220          self.units = [] 
221          self.ookeys = {} 
222          self.filename = "" 
223          self.languages = [] 
224          if input is not None: 
225              self.parse(input) 
 226   
228          """adds a parsed line to the file""" 
229          key = thisline.getkey() 
230          element = self.ookeys.get(key, None) 
231          if element is None: 
232              element = self.UnitClass() 
233              self.units.append(element) 
234              self.ookeys[key] = element 
235          element.addline(thisline) 
236          self.oolines.append(thisline) 
237          if thisline.languageid not in self.languages: 
238              self.languages.append(thisline.languageid) 
 239   
241          """parses lines and adds them to the file""" 
242          if not self.filename: 
243              self.filename = getattr(input, 'name', '') 
244          if hasattr(input, "read"): 
245              src = input.read() 
246              input.close() 
247          else: 
248              src = input 
249          for line in src.split("\n"): 
250              line = quote.rstripeol(line) 
251              if not line: 
252                  continue 
253              parts = line.split("\t") 
254              thisline = ooline(parts) 
255              self.addline(thisline) 
 256   
260   
262          """converts all the lines back to tab-delimited form""" 
263          lines = [] 
264          for oe in self.units: 
265              if len(oe.lines) > 2: 
266                  warnings.warn("contains %d lines (should be 2 at most): languages %r" % (len(oe.lines), oe.languages)) 
267                  oekeys = [line.getkey() for line in oe.lines] 
268                  warnings.warn("contains %d lines (should be 2 at most): keys %r" % (len(oe.lines), oekeys)) 
269              oeline = str(oe) + "\r\n" 
270              lines.append(oeline) 
271          return "".join(lines) 
  272   
274      """this takes a huge GSI file and represents it as multiple smaller files...""" 
275 -    def __init__(self, filename, mode=None, multifilestyle="single"): 
 276          """initialises oomultifile from a seekable inputfile or writable outputfile""" 
277          self.filename = filename 
278          if mode is None: 
279              if os.path.exists(filename): 
280                  mode = 'r' 
281              else: 
282                  mode = 'w' 
283          self.mode = mode 
284          self.multifilestyle = multifilestyle 
285          self.multifilename = os.path.splitext(filename)[0] 
286          self.multifile = open(filename, mode) 
287          self.subfilelines = {} 
288          if mode == "r": 
289              self.createsubfileindex() 
 290   
292          """reads in all the lines and works out the subfiles""" 
293          linenum = 0 
294          for line in self.multifile: 
295              subfile = self.getsubfilename(line) 
296              if not subfile in self.subfilelines: 
297                  self.subfilelines[subfile] = [] 
298              self.subfilelines[subfile].append(linenum) 
299              linenum += 1 
 300   
302          """looks up the subfile name for the line""" 
303          if line.count("\t") < 2: 
304              raise ValueError("invalid tab-delimited line: %r" % line) 
305          lineparts = line.split("\t", 2) 
306          module, filename = lineparts[0], lineparts[1] 
307          if self.multifilestyle == "onefile": 
308              ooname = self.multifilename 
309          elif self.multifilestyle == "toplevel": 
310              ooname = module 
311          else: 
312              filename = filename.replace("\\", "/") 
313              fileparts = [module] + filename.split("/") 
314              ooname = os.path.join(*fileparts[:-1]) 
315          return ooname + os.extsep + "oo" 
 316   
318          """returns a list of subfiles in the file""" 
319          return self.subfilelines.keys() 
 320   
322          """iterates through the subfile names""" 
323          for subfile in self.listsubfiles(): 
324              yield subfile 
 325   
327          """checks if this pathname is a valid subfile""" 
328          return pathname in self.subfilelines 
 329   
331          """returns the list of lines matching the subfile""" 
332          lines = [] 
333          requiredlines = dict.fromkeys(self.subfilelines[subfile]) 
334          linenum = 0 
335          self.multifile.seek(0) 
336          for line in self.multifile: 
337              if linenum in requiredlines: 
338                  lines.append(line) 
339              linenum += 1 
340          return "".join(lines) 
 341   
348   
350          """returns a pseudo-file object for the given subfile""" 
351          def onclose(contents): 
352              self.multifile.write(contents) 
353              self.multifile.flush() 
 354          outputfile = wStringIO.CatchStringOutput(onclose) 
355          outputfile.filename = subfile 
356          return outputfile 
 357   
359          """returns an oofile built up from the given subfile's lines""" 
360          subfilesrc = self.getsubfilesrc(subfile) 
361          oosubfile = oofile() 
362          oosubfile.filename = subfile 
363          oosubfile.parse(subfilesrc) 
364          return oosubfile 
 365   
366  if __name__ == '__main__': 
367      of = oofile() 
368      of.parse(sys.stdin.read()) 
369      sys.stdout.write(str(of)) 
370