1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22  """Manage the Wordfast Translation Memory format 
 23  """ 
 24   
 25  import csv 
 26  import time 
 27  from translate.storage import base 
 28   
 29  WF_TIMEFORMAT = "%Y%m%d~%H%M%S" 
 30  """Time format used by Wordfast""" 
 31   
 32  WF_FIELDNAMES_HEADER = ["date", "userlist", "tucount", "src-lang", "version", "target-lang", "license", "attr1list", "attr2list", "attr3list", "attr4list", "attr5list"] 
 33  """Field names for the Wordfast header""" 
 34   
 35  WF_FIELDNAMES = ["date", "user", "reuse", "src-lang", "source", "target-lang", "target", "attr1", "attr2", "attr3", "attr4"] 
 36  """Field names for a Wordfast TU""" 
 37   
 38  WF_FIELDNAMES_HEADER_DEFAULTS = { 
 39  "date": "%19000101~121212",  
 40  "userlist": "%User ID,TT,TT Translate-Toolkit",  
 41  "tucount": "%TU=00000001",  
 42  "src-lang": "%EN-US",  
 43  "version": "%Wordfast TM v.5.51w9/00",  
 44  "target-lang": "",  
 45  "license": "%---00000001",  
 46  "attr1list": "",  
 47  "attr2list": "",  
 48  "attr3list": "",  
 49  "attr4list": "" } 
 50  """Default or minimum header entries for a Wordfast file""" 
 51   
 52   
 53   
 54   
 55   
 56  WF_ESCAPE_MAP = ( 
 57                ("&'26;", u"\u0026"),  
 58                ("&'82;", u"\u201A"),  
 59                ("&'85;", u"\u2026"),  
 60                ("&'91;", u"\u2018"),  
 61                ("&'92;", u"\u2019"),  
 62                ("&'93;", u"\u201C"),  
 63                ("&'94;", u"\u201D"),  
 64                ("&'96;", u"\u2013"),  
 65                ("&'97;", u"\u2014"),  
 66                ("&'99;", u"\u2122"),  
 67                 
 68                ("&'A0;", u"\u00A0"),  
 69                ("&'A9;", u"\u00A9"),  
 70                ("&'AE;", u"\u00AE"),  
 71                ("&'BC;", u"\u00BC"),  
 72                ("&'BD;", u"\u00BD"),  
 73                ("&'BE;", u"\u00BE"),  
 74                 
 75                ("&'A8;", u"\u00AE"),  
 76                ("&'AA;", u"\u2122"),  
 77                ("&'C7;", u"\u00AB"),  
 78                ("&'C8;", u"\u00BB"),  
 79                ("&'C9;", u"\u2026"),  
 80                ("&'CA;", u"\u00A0"),  
 81                ("&'D0;", u"\u2013"),  
 82                ("&'D1;", u"\u2014"),  
 83                ("&'D2;", u"\u201C"),  
 84                ("&'D3;", u"\u201D"),  
 85                ("&'D4;", u"\u2018"),  
 86                ("&'D5;", u"\u2019"),  
 87                ("&'E2;", u"\u201A"),  
 88                ("&'E3;", u"\u201E"),  
 89                 
 90                 
 91               ) 
 92  """Mapping of Wordfast &'XX; escapes to correct Unicode characters""" 
 93   
 94  TAB_UTF16 = "\x00\x09" 
 95   
 97      """Char -> Wordfast &'XX; escapes 
 98       
 99      @note: Full roundtripping is not possible because of the escaping of \n and \t""" 
100       
101      if string: 
102          for code, char in WF_ESCAPE_MAP: 
103              string = string.replace(char.encode('utf-8'), code) 
104          string = string.replace("\n", "\\n").replace("\t", "\\t") 
105      return string 
 106   
114   
116      """Manages time stamps in the Wordfast format of YYYYMMDD~hhmmss""" 
118          self._time = None 
119          if not newtime: 
120              self.time = None 
121          elif isinstance(newtime, basestring): 
122              self.timestring = newtime 
123          elif isinstance(newtime, time.struct_time): 
124              self.time = newtime 
 125   
127          """Get the time in the Wordfast time format""" 
128          if not self._time: 
129              return None 
130          else: 
131              return time.strftime(WF_TIMEFORMAT, self._time) 
 132   
134          """Set the time_sturct object using a Wordfast time formated string 
135   
136          @param timestring: A Wordfast time string (YYYMMDD~hhmmss) 
137          @type timestring: String 
138          """ 
139          self._time = time.strptime(timestring, WF_TIMEFORMAT) 
 140      timestring = property(get_timestring, set_timestring) 
141   
143          """Get the time_struct object""" 
144          return self._time 
 145   
147          """Set the time_struct object 
148           
149          @param newtime: a new time object 
150          @type newtime: time.time_struct 
151          """ 
152          if newtime and isinstance(newtime, time.struct_time): 
153              self._time = newtime 
154          else: 
155              self._time = None 
 156      time = property(get_time, set_time) 
157   
 163   
165      """A wordfast translation memory header""" 
172   
178   
180          """Get the header dictionary""" 
181          return self._header_dict 
 182   
184          self._header_dict = newheader 
 185      header = property(getheader, setheader) 
186   
188          self._header_dict['target-lang'] = '%%%s' % newlang 
 189      targetlang = property(None, settargetlang) 
190   
192          self._header_dict['tucount'] = '%%TU=%08d' % count 
 193      tucount = property(None, settucount) 
 194   
196      """A Wordfast translation memory unit""" 
202   
206   
208          """Get the dictionary of values for a Wordfast line""" 
209          return self._dict 
 210   
212          """Set the dictionary of values for a Wordfast line 
213   
214          @param newdict: a new dictionary with Wordfast line elements 
215          @type newdict: Dict 
216          """ 
217           
218          self._dict = newdict 
 219      dict = property(getdict, setdict) 
220   
222          if self._dict[key] is None: 
223              return None 
224          elif self._dict[key]: 
225              return _wf_to_char(self._dict[key]).decode('utf-8') 
226          else: 
227              return "" 
 228   
230          if newvalue is None: 
231              self._dict[key] = None 
232          if isinstance(newvalue, unicode): 
233              newvalue = newvalue.encode('utf-8') 
234          newvalue = _char_to_wf(newvalue) 
235          if not key in self._dict or newvalue != self._dict[key]: 
236              self._dict[key] = newvalue 
237              self._update_timestamp() 
 238   
241   
244      source = property(getsource, setsource) 
245   
248   
251      target = property(gettarget, settarget) 
252   
254          self._dict['target-lang'] = newlang 
 255      targetlang = property(None, settargetlang) 
256   
258          return str(self._dict) 
 259   
261          if not self._dict.get('source', None): 
262              return False 
263          return bool(self._dict.get('target', None)) 
  264   
265   
267      """A Wordfast translation memory file""" 
269          """construct a Wordfast TM, optionally reading in from inputfile.""" 
270          self.UnitClass = unitclass 
271          base.TranslationStore.__init__(self, unitclass=unitclass) 
272          self.filename = '' 
273          self.header = WordfastHeader() 
274          self._encoding = 'utf-16' 
275          if inputfile is not None: 
276              self.parse(inputfile) 
 277   
279          """parsese the given file or file source string""" 
280          if hasattr(input, 'name'): 
281              self.filename = input.name 
282          elif not getattr(self, 'filename', ''): 
283              self.filename = '' 
284          if hasattr(input, "read"): 
285              tmsrc = input.read() 
286              input.close() 
287              input = tmsrc 
288          if TAB_UTF16 in input.split("\n")[0]: 
289              self._encoding = 'utf-16' 
290          else: 
291              self._encoding = 'iso-8859-1' 
292          try: 
293              input = input.decode(self._encoding).encode('utf-8') 
294          except: 
295              raise ValueError("Wordfast files are either UTF-16 (UCS2) or ISO-8859-1 encoded") 
296          for header in csv.DictReader(input.split("\n")[:1], fieldnames=WF_FIELDNAMES_HEADER, dialect="excel-tab"): 
297              self.header = WordfastHeader(header) 
298          lines = csv.DictReader(input.split("\n")[1:], fieldnames=WF_FIELDNAMES, dialect="excel-tab") 
299          for line in lines: 
300              newunit = WordfastUnit() 
301              newunit.dict = line 
302              self.addunit(newunit) 
 303   
 325