1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23   
 24   
 25   
 26   
 27   
 28   
 29   
 30   
 31  """Module for parsing Gettext .mo files for translation. 
 32   
 33  The coding of .mo files was produced from documentation in Gettext 0.16 and  
 34  from observation and testing of existing .mo files in the wild. 
 35   
 36  The class does not implement any of the hashing componets of Gettext.  This  
 37  will probably make the output file slower in some instances. 
 38  """ 
 39   
 40  from translate.storage import base 
 41  from translate.storage import po 
 42  from translate.misc.multistring import multistring 
 43  import struct 
 44  import array 
 45  import re 
 46   
 47  MO_MAGIC_NUMBER = 0x950412deL 
 48   
 50      """Helper to unpack Gettext MO files into a Python string""" 
 51      f = open(filename) 
 52      s = f.read() 
 53      print "\\x%02x"*len(s) % tuple(map(ord, s)) 
 54      f.close() 
  55   
 56 -class mounit(base.TranslationUnit): 
  57      """A class representing a .mo translation message.""" 
 62   
 63 -    def getcontext(self): 
  64          """Get the message context""" 
 65           
 66          if self.msgctxt is None: 
 67              return None 
 68          return "".join(self.msgctxt) 
  69   
 71          """Is this a header entry?""" 
 72          return self.source == "" 
  73   
 75          """Is this message translateable?""" 
 76          return bool(self.source) 
   77   
 78 -class mofile(base.TranslationStore): 
  79      """A class representing a .mo file.""" 
 80      UnitClass = mounit 
 88   
 90          """Output a string representation of the MO data file""" 
 91           
 92          MESSAGES = {} 
 93          for unit in self.units: 
 94              if isinstance(unit.source, multistring): 
 95                  source = "".join(unit.msgidcomments) + "\0".join(unit.source.strings) 
 96              else: 
 97                  source = "".join(unit.msgidcomments) + unit.source 
 98              if unit.msgctxt: 
 99                  source = "".join(unit.msgctxt) + "\x04" + source 
100              if isinstance(unit.target, multistring): 
101                  target = "\0".join(unit.target.strings) 
102              else: 
103                  target = unit.target 
104              if unit.target: 
105                  MESSAGES[source.encode("utf-8")] = target 
106          keys = MESSAGES.keys() 
107           
108          keys.sort() 
109          offsets = [] 
110          ids = strs = '' 
111          for id in keys: 
112               
113               
114               
115              string = MESSAGES[id]  
116              if isinstance(string, unicode): 
117                  string = string.encode('utf-8') 
118              offsets.append((len(ids), len(id), len(strs), len(string))) 
119              ids = ids + id + '\0' 
120              strs = strs + string + '\0' 
121          output = '' 
122           
123           
124           
125          keystart = 7*4+16*len(keys) 
126           
127          valuestart = keystart + len(ids) 
128          koffsets = [] 
129          voffsets = [] 
130           
131           
132          for o1, l1, o2, l2 in offsets: 
133              koffsets = koffsets + [l1, o1+keystart] 
134              voffsets = voffsets + [l2, o2+valuestart] 
135          offsets = koffsets + voffsets 
136          output = struct.pack("Iiiiiii", 
137                               MO_MAGIC_NUMBER,    
138                               0,                  
139                               len(keys),          
140                               7*4,                
141                               7*4+len(keys)*8,    
142                               0, 0)               
143          output = output + array.array("i", offsets).tostring() 
144          output = output + ids 
145          output = output + strs 
146          return output 
 147   
149          """parses the given file or file source string""" 
150          if hasattr(input, 'name'): 
151              self.filename = input.name 
152          elif not getattr(self, 'filename', ''): 
153              self.filename = '' 
154          if hasattr(input, "read"): 
155              mosrc = input.read() 
156              input.close() 
157              input = mosrc 
158          little, = struct.unpack("<L", input[:4]) 
159          big, = struct.unpack(">L", input[:4]) 
160          if little == MO_MAGIC_NUMBER: 
161              endian = "<" 
162          elif big == MO_MAGIC_NUMBER: 
163              endian = ">" 
164          else: 
165              raise ValueError("This is not an MO file") 
166          magic, version, lenkeys, startkey, startvalue, sizehash, offsethash = struct.unpack("%sLiiiiii" % endian, input[:(7*4)]) 
167          if version > 1: 
168              raise ValueError("Unable to process MO files with versions > 1.  This is a %d version MO file" % version) 
169          encoding = 'UTF-8' 
170          for i in range(lenkeys): 
171              nextkey = startkey+(i*2*4) 
172              nextvalue = startvalue+(i*2*4) 
173              klength, koffset = struct.unpack("%sii" % endian, input[nextkey:nextkey+(2*4)]) 
174              vlength, voffset = struct.unpack("%sii" % endian, input[nextvalue:nextvalue+(2*4)]) 
175              source = input[koffset:koffset+klength] 
176              context = None 
177               
178              if "\x04" in source: 
179                  context, source = source.split("\x04") 
180               
181               
182              source = multistring(source.split("\0"), encoding=encoding) 
183              if source == "": 
184                  charset = re.search("charset=([^\\s]+)", input[voffset:voffset+vlength]) 
185                  if charset: 
186                      encoding = po.encodingToUse(charset.group(1)) 
187              target = multistring(input[voffset:voffset+vlength].split("\0"), encoding=encoding) 
188              newunit = mounit(source) 
189              newunit.settarget(target) 
190              if context is not None: 
191                  newunit.msgctxt.append(context) 
192              self.addunit(newunit) 
  193