1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22  """Module to provide a cache of statistics in a database. 
 23   
 24  @organization: Zuza Software Foundation 
 25  @copyright: 2007 Zuza Software Foundation 
 26  @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>} 
 27  """ 
 28   
 29  from translate import __version__ as toolkitversion 
 30  from translate.storage import factory 
 31  from translate.misc.multistring import multistring 
 32  from translate.lang.common import Common 
 33   
 34  try: 
 35      from sqlite3 import dbapi2 
 36  except ImportError: 
 37      from pysqlite2 import dbapi2 
 38  import os.path 
 39  import re 
 40  import sys 
 41   
 42  kdepluralre = re.compile("^_n: ") 
 43  brtagre = re.compile("<br\s*?/?>") 
 44  xmltagre = re.compile("<[^>]+>") 
 45  numberre = re.compile("\\D\\.\\D") 
 46   
 47  state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"} 
 48   
 58   
 60      """Counts the words in the unit's source and target, taking plurals into  
 61      account. The target words are only counted if the unit is translated.""" 
 62      (sourcewords, targetwords) = (0, 0) 
 63      if isinstance(unit.source, multistring): 
 64          sourcestrings = unit.source.strings 
 65      else: 
 66          sourcestrings = [unit.source or ""] 
 67      for s in sourcestrings: 
 68          sourcewords += wordcount(s) 
 69      if not unit.istranslated(): 
 70          return sourcewords, targetwords 
 71      if isinstance(unit.target, multistring): 
 72          targetstrings = unit.target.strings 
 73      else: 
 74          targetstrings = [unit.target or ""] 
 75      for s in targetstrings: 
 76          targetwords += wordcount(s) 
 77      return sourcewords, targetwords 
  78   
 80      """Returns the numeric database state for the unit.""" 
 81      if unit.istranslated(): 
 82          return 1 
 83      if unit.isfuzzy() and unit.target: 
 84          return 2 
 85      return 0 
  86   
 88      """Returns a dictionary with all statistics initalised to 0.""" 
 89      stats = {} 
 90      for state in ["total", "translated", "fuzzy", "untranslated", "review"]: 
 91          stats[state] = 0 
 92          stats[state + "sourcewords"] = 0 
 93          stats[state + "targetwords"] = 0 
 94      return stats 
  95   
 97      """Provides the filename of the associated file containing suggestions and  
 98      its mtime, if it exists.""" 
 99      root, ext = os.path.splitext(filename) 
100      suggestion_filename = None 
101      suggestion_mtime = -1 
102      if ext == os.path.extsep + "po": 
103           
104           
105           
106          suggestion_filename = filename + os.path.extsep + 'pending' 
107          if not os.path.exists(suggestion_filename): 
108              suggestion_filename = None 
109          else: 
110              suggestion_mtime = os.path.getmtime(suggestion_filename) 
111      return suggestion_filename, suggestion_mtime 
 112   
114      """An object instantiated as a singleton for each statsfile that provides  
115      access to the database cache from a pool of StatsCache objects.""" 
116      caches = {} 
117      defaultfile = None 
118      con = None 
119      """This cache's connection""" 
120      cur = None 
121      """The current cursor""" 
122   
124          if not statsfile: 
125              if not cls.defaultfile: 
126                  userdir = os.path.expanduser("~") 
127                  cachedir = None 
128                  if os.name == "nt": 
129                      cachedir = os.path.join(userdir, "Translate Toolkit") 
130                  else: 
131                      cachedir = os.path.join(userdir, ".translate_toolkit") 
132                  if not os.path.exists(cachedir): 
133                      os.mkdir(cachedir) 
134                  cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db")) 
135              statsfile = cls.defaultfile 
136          else: 
137              statsfile = os.path.realpath(statsfile) 
138           
139          if statsfile in cls.caches: 
140              return cls.caches[statsfile] 
141           
142          cache = cls.caches[statsfile] = object.__new__(cls) 
143          cache.con = dbapi2.connect(statsfile) 
144          cache.cur = cache.con.cursor() 
145          cache.create() 
146          return cache 
 147   
149          """Create all tables and indexes.""" 
150          self.cur.execute("""CREATE TABLE IF NOT EXISTS files( 
151              fileid INTEGER PRIMARY KEY AUTOINCREMENT, 
152              path VARCHAR NOT NULL UNIQUE, 
153              mtime INTEGER NOT NULL, 
154              toolkitbuild INTEGER NOT NULL);""") 
155   
156          self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex 
157              ON files (path);""") 
158   
159          self.cur.execute("""CREATE TABLE IF NOT EXISTS units( 
160              id INTEGER PRIMARY KEY AUTOINCREMENT, 
161              unitid VARCHAR NOT NULL, 
162              fileid INTEGER NOT NULL, 
163              unitindex INTEGER NOT NULL, 
164              source VARCHAR NOT NULL, 
165              target VARCHAR, 
166              state INTEGER, 
167              sourcewords INTEGER, 
168              targetwords INTEGER);""") 
169           
170          self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex 
171              ON units(fileid);""") 
172   
173          self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs( 
174              configid INTEGER PRIMARY KEY AUTOINCREMENT, 
175              config VARCHAR);""") 
176   
177          self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex 
178              ON checkerconfigs(config);""") 
179   
180          self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors( 
181              errorid INTEGER PRIMARY KEY AUTOINCREMENT, 
182              unitindex INTEGER NOT NULL, 
183              fileid INTEGER NOT NULL, 
184              configid INTEGER NOT NULL, 
185              name VARCHAR NOT NULL, 
186              message VARCHAR);""") 
187   
188          self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex 
189              ON uniterrors(fileid, configid);""") 
190           
191          self.con.commit() 
 192   
194          """Attempt to find the fileid of the given file, if it hasn't been 
195          updated since the last record update. 
196   
197          None is returned if either the file's record is not found, or if it is 
198          not up to date. 
199   
200          @param filename: the filename to retrieve the id for 
201          @param optmtime: an optional mtime to consider in addition to the mtime of 
202          the given file 
203          @rtype: String or None 
204          """ 
205          realpath = os.path.realpath(filename) 
206          self.cur.execute("""SELECT fileid, mtime FROM files  
207                  WHERE path=?;""", (realpath,)) 
208          filerow = self.cur.fetchone() 
209          mtime = max(optmtime, os.path.getmtime(realpath)) 
210          if checkmtime: 
211              if not filerow or filerow[1] != mtime: 
212                  return None 
213          if filerow: 
214              fileid = filerow[0] 
215              if not checkmtime: 
216                   
217                  self.cur.execute("""UPDATE files  
218                          SET mtime=?  
219                          WHERE fileid=?;""", (mtime, fileid)) 
220              return fileid 
221          return None 
 222   
224          """See if this checker configuration has been used before.""" 
225          config = str(checker.config.__dict__) 
226          self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE  
227              config=?;""", (config,)) 
228          configrow = self.cur.fetchone() 
229          if not configrow or configrow[1] != config: 
230              return None 
231          else: 
232              return configrow[0] 
 233   
235          """Cache the statistics for the supplied unit(s).""" 
236          unitvalues = [] 
237          for index, unit in enumerate(units): 
238              if unit.istranslatable(): 
239                  sourcewords, targetwords = wordsinunit(unit) 
240                  if unitindex: 
241                      index = unitindex 
242                   
243                  unitvalues.append((unit.getid(), fileid, index, \ 
244                                  unit.source, unit.target, \ 
245                                  sourcewords, targetwords, \ 
246                                  statefordb(unit))) 
247           
248          self.cur.executemany("""INSERT INTO units 
249              (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state)  
250              values (?, ?, ?, ?, ?, ?, ?, ?);""", 
251              unitvalues) 
252          self.con.commit() 
253          if unitindex: 
254              return state_strings[statefordb(units[0])] 
255          return "" 
 256   
258          """Calculates and caches the statistics of the given store  
259          unconditionally.""" 
260          realpath = os.path.realpath(store.filename) 
261          mtime = os.path.getmtime(realpath) 
262          self.cur.execute("""DELETE FROM files WHERE 
263              path=?;""", (realpath,)) 
264          self.cur.execute("""INSERT INTO files  
265              (fileid, path, mtime, toolkitbuild) values (NULL, ?, ?, ?);""",  
266              (realpath, mtime, toolkitversion.build)) 
267          fileid = self.cur.lastrowid 
268          self.cur.execute("""DELETE FROM units WHERE 
269              fileid=?""", (fileid,)) 
270          self._cacheunitstats(store.units, fileid) 
271          return fileid 
 272   
274          """Retrieves the stored statistics for a given directory, all summed. 
275           
276          Note that this does not check for mtimes or the presence of files.""" 
277          realpath = os.path.realpath(dirname) 
278          self.cur.execute("""SELECT 
279              state, 
280              count(unitid) as total, 
281              sum(sourcewords) as sourcewords, 
282              sum(targetwords) as targetwords 
283              FROM units WHERE fileid IN 
284                  (SELECT fileid from files 
285                  WHERE substr(path, 0, ?)=?) 
286              GROUP BY state;""", (len(realpath), realpath)) 
287          totals = emptystats() 
288          return self.cur.fetchall() 
 289   
291          """Retrieves the statistics for the given file if possible, otherwise  
292          delegates to cachestore().""" 
293          fileid = self._getstoredfileid(filename) 
294          if not fileid: 
295              try: 
296                  store = factory.getobject(filename) 
297                  fileid = self.cachestore(store) 
298              except ValueError, e: 
299                  print >> sys.stderr, str(e) 
300                  return {} 
301   
302          self.cur.execute("""SELECT  
303              state, 
304              count(unitid) as total, 
305              sum(sourcewords) as sourcewords, 
306              sum(targetwords) as targetwords 
307              FROM units WHERE fileid=? 
308              GROUP BY state;""", (fileid,)) 
309          values = self.cur.fetchall() 
310   
311          totals = emptystats() 
312          for stateset in values: 
313              state = state_strings[stateset[0]]           
314              totals[state] = stateset[1] or 0             
315              totals[state + "sourcewords"] = stateset[2]  
316              totals[state + "targetwords"] = stateset[3]  
317          totals["total"] = totals["untranslated"] + totals["translated"] + totals["fuzzy"] 
318          totals["totalsourcewords"] = totals["untranslatedsourcewords"] + \ 
319                  totals["translatedsourcewords"] + \ 
320                  totals["fuzzysourcewords"] 
321          return totals 
 322   
324          """Helper method for cachestorechecks() and recacheunit()""" 
325           
326           
327          dummy = (-1, fileid, configid, "noerror", "") 
328          unitvalues = [dummy] 
329           
330          errornames = [] 
331          for index, unit in enumerate(units): 
332              if unit.istranslatable(): 
333                   
334                  if unitindex: 
335                      index = unitindex 
336                  failures = checker.run_filters(unit) 
337                  for checkname, checkmessage in failures.iteritems(): 
338                      unitvalues.append((index, fileid, configid, checkname, checkmessage)) 
339                      errornames.append("check-" + checkname) 
340          checker.setsuggestionstore(None) 
341   
342   
343          if unitindex: 
344               
345               
346              unitvalues.remove(dummy) 
347              errornames.append("total") 
348   
349           
350          self.cur.executemany("""INSERT INTO uniterrors 
351              (unitindex, fileid, configid, name, message)  
352              values (?, ?, ?, ?, ?);""", 
353              unitvalues) 
354          self.con.commit() 
355          return errornames 
 356   
358          """Calculates and caches the error statistics of the given store  
359          unconditionally.""" 
360           
361           
362          self.cur.execute("""DELETE FROM uniterrors WHERE 
363              fileid=?;""", (fileid,)) 
364          self._cacheunitschecks(store.units, fileid, configid, checker) 
365          return fileid 
 366   
368          """Recalculate all information for a specific unit. This is necessary 
369          for updating all statistics when a translation of a unit took place,  
370          for example. 
371           
372          This method assumes that everything was up to date before (file totals, 
373          checks, checker config, etc.""" 
374          suggestion_filename, suggestion_mtime = suggestioninfo(filename) 
375          fileid = self._getstoredfileid(filename, suggestion_mtime, checkmtime=False) 
376          configid = self._getstoredcheckerconfig(checker) 
377          unitid = unit.getid() 
378           
379          self.cur.execute("""SELECT unitindex FROM units WHERE 
380              fileid=? AND unitid=?;""", (fileid, unitid)) 
381          unitindex = self.cur.fetchone()[0] 
382          self.cur.execute("""DELETE FROM units WHERE 
383              fileid=? AND unitid=?;""", (fileid, unitid)) 
384          state = [self._cacheunitstats([unit], fileid, unitindex)] 
385           
386          self.cur.execute("""DELETE FROM uniterrors WHERE 
387              fileid=? AND unitindex=?;""", (fileid, unitindex)) 
388          if suggestion_filename: 
389              checker.setsuggestionstore(factory.getobject(suggestion_filename, ignore=os.path.extsep+ 'pending')) 
390          state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex)) 
391          return state 
 392   
393 -    def filechecks(self, filename, checker, store=None): 
 394          """Retrieves the error statistics for the given file if possible,  
395          otherwise delegates to cachestorechecks().""" 
396          suggestion_filename, suggestion_mtime = suggestioninfo(filename) 
397          fileid = self._getstoredfileid(filename, suggestion_mtime) 
398          configid = self._getstoredcheckerconfig(checker) 
399          try: 
400              if not fileid: 
401                  store = store or factory.getobject(filename) 
402                  fileid = self.cachestore(store) 
403              if not configid: 
404                  self.cur.execute("""INSERT INTO checkerconfigs 
405                      (configid, config) values (NULL, ?);""",  
406                      (str(checker.config.__dict__),)) 
407                  configid = self.cur.lastrowid 
408          except ValueError, e: 
409              print >> sys.stderr, str(e) 
410              return {} 
411   
412          def geterrors(): 
413              self.cur.execute("""SELECT  
414                  name, 
415                  unitindex 
416                  FROM uniterrors WHERE fileid=? and configid=? 
417                  ORDER BY unitindex;""", (fileid, configid)) 
418              return self.cur.fetchall() 
 419   
420          values = geterrors() 
421          if not values: 
422               
423               
424              store = store or factory.getobject(filename) 
425              if suggestion_filename: 
426                  checker.setsuggestionstore(factory.getobject(suggestion_filename, ignore=os.path.extsep+ 'pending')) 
427              self.cachestorechecks(fileid, store, checker, configid) 
428              values = geterrors() 
429   
430          errors = {} 
431          for value in values: 
432              if value[1] == -1: 
433                  continue 
434              checkkey = 'check-' + value[0]       
435              if not checkkey in errors: 
436                  errors[checkkey] = [] 
437              errors[checkkey].append(value[1])    
438   
439          return errors 
 440   
441 -    def filestats(self, filename, checker, store=None): 
 442          """complete stats""" 
443          stats = {"total": [], "translated": [], "fuzzy": [], "untranslated": []} 
444   
445          stats.update(self.filechecks(filename, checker, store)) 
446          fileid = self._getstoredfileid(filename) 
447   
448          self.cur.execute("""SELECT  
449              state, 
450              unitindex 
451              FROM units WHERE fileid=? 
452              ORDER BY unitindex;""", (fileid,)) 
453   
454          values = self.cur.fetchall() 
455          for value in values: 
456              stats[state_strings[value[0]]].append(value[1]) 
457              stats["total"].append(value[1]) 
458   
459          return stats 
 460