1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23  """Module for parsing Qt .qm files 
 24   
 25  @note: based on documentation from Gettext's .qm implementation (see write-qt.c) and on observation 
 26  of the output of lrelease. 
 27  @note: Certain deprecated section tags are not implemented.  These will break and print out 
 28  the missing tag.  They are easy to implement and should follow the structure in 03  
 29  (Translation).  We could find no examples that use these so we'd rather leave it  
 30  unimplemented until we actually have test data. 
 31  @note: Many .qm files are unable to be parsed as they do not have the source text.  We assume 
 32  that since they use a hash table to lookup the data there is actually no need for the  
 33  source text.  It seems however that in Qt4's lrelease all data is included in the resultant .qm 
 34  file. 
 35  @todo: We can only parse, not create, a .qm file.  The main issue is that we need to  
 36  implement the hashing algorithm (which seems to be identical to the Gettext hash algorithm).  Unlike 
 37  Gettext it seems that the hash is required, but that has not been validated. 
 38  @todo: The code can parse files correctly.  But it could be cleaned up to be more readable, especially  
 39  the part that breaks the file into sections. 
 40  """ 
 41   
 42  from translate.storage import base 
 43  from translate.misc.multistring import multistring 
 44  import codecs 
 45  import struct 
 46  import sys 
 47   
 48  QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL) 
 49   
 51      """Helper to unpack Qt .qm files into a Python string""" 
 52      f = open(qmfile) 
 53      s = f.read() 
 54      print "\\x%02x"*len(s) % tuple(map(ord, s)) 
 55      f.close() 
  56   
 57 -class qmunit(base.TranslationUnit): 
  58      """A class representing a .qm translation message.""" 
  61   
 62 -class qmfile(base.TranslationStore): 
  63      """A class representing a .qm file.""" 
 64      UnitClass = qmunit 
 72   
 74          """Output a string representation of the .qm data file""" 
 75          return "" 
  76   
 78          """parses the given file or file source string""" 
 79          if hasattr(input, 'name'): 
 80              self.filename = input.name 
 81          elif not getattr(self, 'filename', ''): 
 82              self.filename = '' 
 83          if hasattr(input, "read"): 
 84              qmsrc = input.read() 
 85              input.close() 
 86              input = qmsrc 
 87          magic = struct.unpack(">4L", input[:16]) 
 88          if magic != QM_MAGIC_NUMBER: 
 89              raise ValueError("This is not a .qm file") 
 90          startsection = 16 
 91          sectionheader = 5 
 92          while startsection < len(input): 
 93              section_type, length = struct.unpack(">bL", input[startsection:startsection+sectionheader]) 
 94              if section_type == 0x42: 
 95                   
 96                  hashash = True 
 97                  hash_start = startsection+sectionheader 
 98                  hash_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length]) 
 99              elif section_type == 0x69: 
100                   
101                  hasmessages = True 
102                  messages_start = startsection+sectionheader 
103                  messages_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length]) 
104              elif section_type == 0x2f: 
105                   
106                  hascontexts = True 
107                  contexts_start = startsection+sectionheader 
108                  contexts_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length]) 
109              startsection = startsection+sectionheader+length 
110          pos = messages_start 
111          source = target = None 
112          while pos < messages_start + len(messages_data): 
113              subsection, = struct.unpack(">b", input[pos:pos+1]) 
114              if subsection == 0x01:  
115                   
116                  pos = pos+1 
117                  if not source is None and not target is None: 
118                      newunit = self.addsourceunit(source) 
119                      newunit.target = target 
120                      source = target = None 
121                  else: 
122                      raise ValueError("Old .qm format with no source defined") 
123                  continue 
124               
125              pos = pos+1 
126              length, = struct.unpack(">l", input[pos:pos+4]) 
127              if subsection == 0x03:  
128                  if length != -1: 
129                      raw, = struct.unpack(">%ds" % length, input[pos+4:pos+4+length]) 
130                      string, templen = codecs.utf_16_be_decode(raw) 
131                      if target: 
132                          target.strings.append(string) 
133                      else: 
134                          target = multistring(string) 
135                      pos = pos+4+length 
136                  else: 
137                      target = "" 
138                      pos = pos+4 
139                   
140              elif subsection == 0x06:  
141                  source = input[pos+4:pos+4+length].decode('iso-8859-1') 
142                   
143                  pos = pos+4+length 
144              elif subsection == 0x07:  
145                  context = input[pos+4:pos+4+length].decode('iso-8859-1') 
146                   
147                  pos = pos+4+length 
148              elif subsection == 0x08:  
149                  comment = input[pos+4:pos+4+length] 
150                   
151                  pos = pos+4+length 
152              elif subsection == 0x05:  
153                  hash = input[pos:pos+4] 
154                   
155                  pos = pos+4 
156              else: 
157                  if subsection == 0x02:  
158                      subsection_name = "SourceText16" 
159                  elif subsection == 0x04:  
160                      subsection_name = "Context16" 
161                  else: 
162                      subsection_name = "Unkown" 
163                  print >> sys.stderr, "Unimplemented: %s %s" % (subsection, subsection_name) 
164                  return 
  165