| Trees | Indices | Help | 
 | 
|---|
|  | 
   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  #  
   4  # Copyright 2004-2007 Zuza Software Foundation 
   5  #  
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  #  
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation  
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check  
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the  
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in  
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  import re 
  47   
  48  # These are some regular expressions that are compiled for use in some tests 
  49   
  50  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as 
  51  # this should capture printf types defined in other platforms. 
  52  printf_pat = re.compile('%((?:(?P<ord>\d+)\$)*(?P<fullvar>[+#-]*(?:\d+)*(?:\.\d+)*(hh\|h\|l\|ll)*(?P<type>[\w%])))') 
  53   
  54  # The name of the XML tag 
  55  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  56   
  57  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  58  #TODO: remove escaped strings once usage is audited 
  59  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  60   
  61  # The whole tag 
  62  tag_re = re.compile("<[^>]+>") 
  63   
  65      """Returns the name of the XML/HTML tag in string""" 
  66      return tagname_re.match(string).groups(1)[0] 
  67   
  69      """Tests to see if pair == (a,b,c) is in list, but handles None entries in  
  70      list as wildcards (only allowed in positions "a" and "c"). We take a shortcut 
  71      by only considering "c" if "b" has already matched.""" 
  72      a, b, c = pair 
  73      if (b, c) == (None, None): 
  74          #This is a tagname 
  75          return pair 
  76      for pattern in list: 
  77          x, y, z = pattern 
  78          if (x, y) in [(a, b), (None, b)]: 
  79              if z in [None, c]: 
  80                  return pattern 
  81      return pair 
  82   
  84      """Returns all the properties in the XML/HTML tag string as  
  85      (tagname, propertyname, propertyvalue), but ignore those combinations  
  86      specified in ignore.""" 
  87      properties = [] 
  88      for string in strings: 
  89          tag = tagname(string) 
  90          properties += [(tag, None, None)] 
  91          #Now we isolate the attribute pairs.  
  92          pairs = property_re.findall(string) 
  93          for property, value, a, b in pairs: 
  94              #Strip the quotes: 
  95              value = value[1:-1] 
  96   
  97              canignore = False 
  98              if (tag, property, value) in ignore or \ 
  99                      intuplelist((tag,property,value), ignore) != (tag,property,value): 
 100                  canignore = True 
 101                  break 
 102              if not canignore: 
 103                  properties += [(tag, property, value)] 
 104      return properties 
 105           
 106   
 108      """This exception signals that a Filter didn't pass, and gives an explanation  
 109      or a comment""" 
 120   
 122      """This exception signals that a Filter didn't pass, and the bad translation  
 123      might break an application (so the string will be marked fuzzy)""" 
 124      pass 
 125   
 126  #(tag, attribute, value) specifies a certain attribute which can be changed/ 
 127  #ignored if it exists inside tag. In the case where there is a third element 
 128  #in the tuple, it indicates a property value that can be ignored if present  
 129  #(like defaults, for example) 
 130  #If a certain item is None, it indicates that it is relevant for all values of 
 131  #the property/tag that is specified as None. A non-None value of "value" 
 132  #indicates that the value of the attribute must be taken into account. 
 133  common_ignoretags = [(None, "xml-lang", None)] 
 134  common_canchangetags = [("img", "alt", None)] 
 135   
 137      """object representing the configuration of a checker""" 
 138 -    def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,  
 139                      notranslatewords=None, musttranslatewords=None, validchars=None,  
 140                      punctuation=None, endpunctuation=None, ignoretags=None,  
 141                      canchangetags=None, criticaltests=None, credit_sources=None): 
 142          # we have to initialise empty lists properly (default arguments get reused) 
 143          if accelmarkers is None: 
 144              accelmarkers = [] 
 145          if varmatches is None: 
 146              varmatches = [] 
 147          if musttranslatewords is None: 
 148              musttranslatewords = [] 
 149          if notranslatewords is None: 
 150              notranslatewords = [] 
 151          self.targetlanguage = targetlanguage 
 152          self.updatetargetlanguage(targetlanguage) 
 153          self.sourcelang = factory.getlanguage('en') 
 154          self.accelmarkers = accelmarkers 
 155          self.varmatches = varmatches 
 156          # TODO: allow user configuration of untranslatable words 
 157          self.notranslatewords = dict.fromkeys([data.forceunicode(key) for key in notranslatewords]) 
 158          self.musttranslatewords = dict.fromkeys([data.forceunicode(key) for key in musttranslatewords]) 
 159          validchars = data.forceunicode(validchars) 
 160          self.validcharsmap = {} 
 161          self.updatevalidchars(validchars) 
 162          punctuation = data.forceunicode(punctuation) 
 163          if punctuation is None: 
 164              punctuation = self.lang.punctuation 
 165          self.punctuation = punctuation 
 166          endpunctuation = data.forceunicode(endpunctuation) 
 167          if endpunctuation is None: 
 168              endpunctuation = self.lang.sentenceend 
 169          self.endpunctuation = endpunctuation 
 170          if ignoretags is None: 
 171              self.ignoretags = common_ignoretags 
 172          else: 
 173              self.ignoretags = ignoretags 
 174          if canchangetags is None: 
 175              self.canchangetags = common_canchangetags 
 176          else: 
 177              self.canchangetags = canchangetags 
 178          if criticaltests is None: 
 179              criticaltests = [] 
 180          self.criticaltests = criticaltests 
 181          if credit_sources is None: 
 182              credit_sources = [] 
 183          self.credit_sources = credit_sources 
 184   
 186          """combines the info in otherconfig into this config object""" 
 187          self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 
 188          self.updatetargetlanguage(self.targetlanguage) 
 189          self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 
 190          self.varmatches.extend(otherconfig.varmatches) 
 191          self.notranslatewords.update(otherconfig.notranslatewords) 
 192          self.musttranslatewords.update(otherconfig.musttranslatewords) 
 193          self.validcharsmap.update(otherconfig.validcharsmap) 
 194          self.punctuation += otherconfig.punctuation 
 195          self.endpunctuation += otherconfig.endpunctuation 
 196          #TODO: consider also updating in the following cases: 
 197          self.ignoretags = otherconfig.ignoretags 
 198          self.canchangetags = otherconfig.canchangetags 
 199          self.criticaltests.extend(otherconfig.criticaltests) 
 200          self.credit_sources = otherconfig.credit_sources 
 201   
 203          """updates the map that eliminates valid characters""" 
 204          if validchars is None: 
 205              return True 
 206          validcharsmap = dict([(ord(validchar), None) for validchar in data.forceunicode(validchars)]) 
 207          self.validcharsmap.update(validcharsmap) 
 208   
 210          """Updates the target language in the config to the given target language""" 
 211          self.lang = factory.getlanguage(langcode) 
 212   
 214      """Parent Checker class which does the checking based on functions available  
 215      in derived classes.""" 
 216      preconditions = {} 
 217   
 218 -    def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None): 
 219          self.errorhandler = errorhandler 
 220          if checkerconfig is None: 
 221              self.setconfig(CheckerConfig()) 
 222          else: 
 223              self.setconfig(checkerconfig) 
 224          # exclude functions defined in UnitChecker from being treated as tests... 
 225          self.helperfunctions = {} 
 226          for functionname in dir(UnitChecker): 
 227              function = getattr(self, functionname) 
 228              if callable(function): 
 229                  self.helperfunctions[functionname] = function 
 230          self.defaultfilters = self.getfilters(excludefilters, limitfilters) 
 231   
 233          """returns dictionary of available filters, including/excluding those in  
 234          the given lists""" 
 235          filters = {} 
 236          if limitfilters is None: 
 237              # use everything available unless instructed 
 238              limitfilters = dir(self) 
 239          if excludefilters is None: 
 240              excludefilters = {} 
 241          for functionname in limitfilters: 
 242              if functionname in excludefilters: continue 
 243              if functionname in self.helperfunctions: continue 
 244              if functionname == "errorhandler": continue 
 245              filterfunction = getattr(self, functionname, None) 
 246              if not callable(filterfunction): continue 
 247              filters[functionname] = filterfunction 
 248          return filters 
 249   
 251          """sets the accelerator list""" 
 252          self.config = config 
 253          self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 
 254          self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 
 255                  for startmatch, endmatch in self.config.varmatches] 
 256          self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone) 
 257                  for startmatch, endmatch in self.config.varmatches] 
 258   
 260          """Sets the filename that a checker should use for evaluating suggestions.""" 
 261          self.suggestion_store = store 
 262   
 266   
 270   
 274   
 276          """replaces words with punctuation with their unpunctuated equivalents""" 
 277          return prefilters.filterwordswithpunctuation(str1) 
 278   
 282   
 284          """Runs the given test on the given unit. 
 285           
 286          Note that this can raise a FilterFailure as part of normal operation""" 
 287          return test(unit) 
 288   
 290          """run all the tests in this suite, return failures as testname, message_or_exception""" 
 291          failures = {} 
 292          ignores = self.config.lang.ignoretests[:] 
 293          functionnames = self.defaultfilters.keys() 
 294          priorityfunctionnames = self.preconditions.keys() 
 295          otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 
 296          for functionname in priorityfunctionnames + otherfunctionnames: 
 297              if functionname in ignores: 
 298                  continue 
 299              filterfunction = getattr(self, functionname, None) 
 300              # this filterfunction may only be defined on another checker if using TeeChecker 
 301              if filterfunction is None: 
 302                  continue 
 303              filtermessage = filterfunction.__doc__ 
 304              try: 
 305                  filterresult = self.run_test(filterfunction, unit) 
 306              except FilterFailure, e: 
 307                  filterresult = False 
 308                  filtermessage = str(e).decode('utf-8') 
 309              except Exception, e: 
 310                  if self.errorhandler is None: 
 311                      raise ValueError("error in filter %s: %r, %r, %s" % \ 
 312                              (functionname, unit.source, unit.target, e)) 
 313                  else: 
 314                      filterresult = self.errorhandler(functionname, unit.source, unit.target, e) 
 315              if not filterresult: 
 316                  # we test some preconditions that aren't actually a cause for failure 
 317                  if functionname in self.defaultfilters: 
 318                      failures[functionname] = filtermessage 
 319                  if functionname in self.preconditions: 
 320                      for ignoredfunctionname in self.preconditions[functionname]: 
 321                          ignores.append(ignoredfunctionname) 
 322          return failures 
 323   
 325      """A checker that passes source and target strings to the checks, not the  
 326      whole unit. 
 327       
 328      This provides some speedup and simplifies testing.""" 
 329 -    def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None): 
 330          super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler) 
 331   
 333          """Runs the given test on the given unit. 
 334           
 335          Note that this can raise a FilterFailure as part of normal operation.""" 
 336          if self.hasplural: 
 337              for pluralform in unit.target.strings: 
 338                  if not test(self.str1, pluralform): 
 339                      return False 
 340              else: 
 341                  return True 
 342          else: 
 343              return test(self.str1, self.str2) 
 344   
 346          """Do some optimisation by caching some data of the unit for the benefit  
 347          of run_test().""" 
 348          self.str1 = data.forceunicode(unit.source) 
 349          self.str2 = data.forceunicode(unit.target) 
 350          self.hasplural = unit.hasplural() 
 351          return super(TranslationChecker, self).run_filters(unit) 
 352   
 354      """A Checker that controls multiple checkers.""" 
 355 -    def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None,  
 356              checkerclasses=None, errorhandler=None, languagecode=None): 
 357          """construct a TeeChecker from the given checkers""" 
 358          self.limitfilters = limitfilters 
 359          if checkerclasses is None: 
 360              checkerclasses = [StandardChecker] 
 361          self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 
 362          if languagecode: 
 363              for checker in self.checkers: 
 364                  checker.config.updatetargetlanguage(languagecode) 
 365              # Let's hook up the language specific checker 
 366              lang_checker = self.checkers[0].config.lang.checker 
 367              if lang_checker: 
 368                  self.checkers.append(lang_checker) 
 369   
 370          self.combinedfilters = self.getfilters(excludefilters, limitfilters) 
 371          self.config = checkerconfig or self.checkers[0].config 
 372   
 374          """returns dictionary of available filters, including/excluding those in  
 375          the given lists""" 
 376          if excludefilters is None: 
 377              excludefilters = {} 
 378          filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 
 379          self.combinedfilters = {} 
 380          for filters in filterslist: 
 381              self.combinedfilters.update(filters) 
 382          # TODO: move this somewhere more sensible (a checkfilters method?) 
 383          if limitfilters is not None: 
 384              for filtername in limitfilters: 
 385                  if not filtername in self.combinedfilters: 
 386                      import sys 
 387                      print >> sys.stderr, "warning: could not find filter %s" % filtername 
 388          return self.combinedfilters 
 389   
 391          """run all the tests in the checker's suites""" 
 392          failures = {} 
 393          for checker in self.checkers: 
 394              failures.update(checker.run_filters(unit)) 
 395          return failures 
 396   
 398          """Sets the filename that a checker should use for evaluating suggestions.""" 
 399          for checker in self.checkers: 
 400              checker.setsuggestionstore(store) 
 401   
 402   
 404      """The basic test suite for source -> target translations.""" 
 406          """checks whether a string has been translated at all""" 
 407          str2 = prefilters.removekdecomments(str2) 
 408          return not (len(str1.strip()) > 0 and len(str2) == 0) 
 409   
 411          """checks whether a translation is basically identical to the original string""" 
 412          str1 = self.filteraccelerators(str1) 
 413          str2 = self.filteraccelerators(str2) 
 414          if len(str1.strip()) == 0: 
 415              return True 
 416          if str1.isupper() and str1 == str2: 
 417              return True 
 418          if self.config.notranslatewords: 
 419              words1 = str1.split() 
 420              if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 
 421                  return True 
 422          str1 = self.removevariables(str1) 
 423          str2 = self.removevariables(str2) 
 424          if not (str1.strip().isdigit() or len(str1) < 2 or decoration.ispurepunctuation(str1.strip())) and (str1.strip().lower() == str2.strip().lower()): 
 425              raise FilterFailure("please translate") 
 426          return True 
 427   
 429          """checks whether a translation only contains spaces""" 
 430          len1 = len(str1.strip()) 
 431          len2 = len(str2.strip()) 
 432          return not (len1 > 0 and len(str2) != 0 and len2 == 0) 
 433   
 435          """checks whether a translation is much shorter than the original string""" 
 436          len1 = len(str1.strip()) 
 437          len2 = len(str2.strip()) 
 438          return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1))) 
 439   
 441          """checks whether a translation is much longer than the original string""" 
 442          len1 = len(str1.strip()) 
 443          len2 = len(str2.strip()) 
 444          return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))  
 445   
 447          """checks whether escaping is consistent between the two strings""" 
 448          if not helpers.countsmatch(str1, str2, ("\\", "\\\\")): 
 449              escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if "\\" in word]) 
 450              escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if "\\" in word]) 
 451              raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 
 452          else: 
 453              return True 
 454   
 456          """checks whether newlines are consistent between the two strings""" 
 457          if not helpers.countsmatch(str1, str2, ("\n", "\r")): 
 458              raise FilterFailure("line endings in original don't match line endings in translation") 
 459          else: 
 460              return True 
 461   
 463          """checks whether tabs are consistent between the two strings""" 
 464          if not helpers.countmatch(str1, str2, "\t"): 
 465              raise SeriousFilterFailure("tabs in original don't match tabs in translation") 
 466          else: 
 467              return True 
 468   
 470          """checks whether singlequoting is consistent between the two strings""" 
 471          str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 
 472          str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 
 473          return helpers.countsmatch(str1, str2, ("'", "''", "\\'")) 
 474   
 476          """checks whether doublequoting is consistent between the two strings""" 
 477          str1 = self.filteraccelerators(self.filtervariables(str1)) 
 478          str1 = self.filterxml(str1) 
 479          str1 = self.config.lang.punctranslate(str1) 
 480          str2 = self.filteraccelerators(self.filtervariables(str2)) 
 481          str2 = self.filterxml(str2) 
 482          return helpers.countsmatch(str1, str2, ('"', '""', '\\"', u"«", u"»")) 
 483   
 485          """checks for bad double-spaces by comparing to original""" 
 486          str1 = self.filteraccelerators(str1) 
 487          str2 = self.filteraccelerators(str2) 
 488          return helpers.countmatch(str1, str2, "  ") 
 489   
 491          """checks for bad spacing after punctuation""" 
 492          if str1.find(u" ") == -1: 
 493              return True 
 494          str1 = self.filteraccelerators(self.filtervariables(str1)) 
 495          str1 = self.config.lang.punctranslate(str1) 
 496          str2 = self.filteraccelerators(self.filtervariables(str2)) 
 497          for puncchar in self.config.punctuation: 
 498              plaincount1 = str1.count(puncchar) 
 499              plaincount2 = str2.count(puncchar) 
 500              if not plaincount1 or plaincount1 != plaincount2: 
 501                  continue 
 502              spacecount1 = str1.count(puncchar+" ") 
 503              spacecount2 = str2.count(puncchar+" ") 
 504              if spacecount1 != spacecount2: 
 505                  # handle extra spaces that are because of transposed punctuation 
 506                  if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1: 
 507                      continue 
 508                  return False 
 509          return True 
 510   
 512          """checks whether printf format strings match""" 
 513          count1 = count2 = None 
 514          for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 
 515              count2 = var_num2 + 1 
 516              if match2.group('ord'): 
 517                  for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 
 518                      count1 = var_num1 + 1 
 519                      if int(match2.group('ord')) == var_num1 + 1: 
 520                          if match2.group('fullvar') != match1.group('fullvar'): 
 521                              return 0 
 522              else: 
 523                  for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 
 524                      count1 = var_num1 + 1 
 525                      if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 
 526                          return 0 
 527   
 528          if count2 is None: 
 529              if list(printf_pat.finditer(str1)): 
 530                  return 0 
 531   
 532          if (count1 or count2) and (count1 != count2): 
 533              return 0 
 534          return 1 
 535   
 537          """checks whether accelerators are consistent between the two strings""" 
 538          str1 = self.filtervariables(str1) 
 539          str2 = self.filtervariables(str2) 
 540          messages = [] 
 541          for accelmarker in self.config.accelmarkers: 
 542              counter = decoration.countaccelerators(accelmarker) 
 543              count1, countbad1 = counter(str1) 
 544              count2, countbad2 = counter(str2) 
 545              getaccel = decoration.getaccelerators(accelmarker) 
 546              accel2, bad2 = getaccel(str2) 
 547              if count1 == count2: 
 548                  continue 
 549              if count1 == 1 and count2 == 0: 
 550                  if countbad2 == 1: 
 551                      messages.append("accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 
 552                  else: 
 553                      messages.append("accelerator %s is missing from translation" % accelmarker) 
 554              elif count1 == 0: 
 555                  messages.append("accelerator %s does not occur in original and should not be in translation" % accelmarker) 
 556              elif count1 == 1 and count2 > count1: 
 557                  messages.append("accelerator %s is repeated in translation" % accelmarker) 
 558              else: 
 559                  messages.append("accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 
 560          if messages: 
 561              if "accelerators" in self.config.criticaltests: 
 562                  raise SeriousFilterFailure(messages) 
 563              else: 
 564                  raise FilterFailure(messages) 
 565          return True 
 566   
 567  #    def acceleratedvariables(self, str1, str2): 
 568  #        """checks that no variables are accelerated""" 
 569  #        messages = [] 
 570  #        for accelerator in self.config.accelmarkers: 
 571  #            for variablestart, variableend in self.config.varmatches: 
 572  #                error = accelerator + variablestart 
 573  #                if str1.find(error) >= 0: 
 574  #                    messages.append("original has an accelerated variable") 
 575  #                if str2.find(error) >= 0: 
 576  #                    messages.append("translation has an accelerated variable") 
 577  #        if messages: 
 578  #            raise FilterFailure(messages) 
 579  #        return True 
 580   
 582          """checks whether variables of various forms are consistent between the two strings""" 
 583          messages = [] 
 584          mismatch1, mismatch2 = [], [] 
 585          varnames1, varnames2 = [], [] 
 586          for startmarker, endmarker in self.config.varmatches: 
 587              varchecker = decoration.getvariables(startmarker, endmarker) 
 588              if startmarker and endmarker: 
 589                  if isinstance(endmarker, int): 
 590                      redecorate = lambda var: startmarker + var 
 591                  else: 
 592                      redecorate = lambda var: startmarker + var + endmarker 
 593              elif startmarker: 
 594                  redecorate = lambda var: startmarker + var 
 595              else: 
 596                  redecorate = lambda var: var 
 597              vars1 = varchecker(str1) 
 598              vars2 = varchecker(str2) 
 599              if vars1 != vars2: 
 600                  # we use counts to compare so we can handle multiple variables 
 601                  vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 
 602                  # filter variable names we've already seen, so they aren't matched by more than one filter... 
 603                  vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 
 604                  varnames1.extend(vars1) 
 605                  varnames2.extend(vars2) 
 606                  vars1 = map(redecorate, vars1) 
 607                  vars2 = map(redecorate, vars2) 
 608                  mismatch1.extend(vars1) 
 609                  mismatch2.extend(vars2) 
 610          if mismatch1: 
 611              messages.append("do not translate: %s" % ", ".join(mismatch1)) 
 612          elif mismatch2: 
 613              messages.append("translation contains variables not in original: %s" % ", ".join(mismatch2)) 
 614          if messages and mismatch1: 
 615              raise SeriousFilterFailure(messages) 
 616          elif messages: 
 617              raise FilterFailure(messages) 
 618          return True 
 619   
 621          """checks that function names are not translated""" 
 622          return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation) 
 623   
 625          """checks that emails are not translated""" 
 626          return helpers.funcmatch(str1, str2, decoration.getemails) 
 627   
 629          """checks that URLs are not translated""" 
 630          return helpers.funcmatch(str1, str2, decoration.geturls) 
 631   
 633          """checks whether numbers of various forms are consistent between the two strings""" 
 634          return helpers.countsmatch(str1, str2, decoration.getnumbers(str1)) 
 635   
 637          """checks whether whitespace at the beginning of the strings matches""" 
 638          str1 = self.filteraccelerators(self.filtervariables(str1)) 
 639          str2 = self.filteraccelerators(self.filtervariables(str2)) 
 640          return helpers.funcmatch(str1, str2, decoration.spacestart) 
 641   
 643          """checks whether whitespace at the end of the strings matches""" 
 644          str1 = self.filteraccelerators(self.filtervariables(str1)) 
 645          str2 = self.filteraccelerators(self.filtervariables(str2)) 
 646          return helpers.funcmatch(str1, str2, decoration.spaceend) 
 647   
 649          """checks whether punctuation at the beginning of the strings match""" 
 650          str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 
 651          str1 = self.config.lang.punctranslate(str1) 
 652          str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 
 653          return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation) 
 654   
 656          """checks whether punctuation at the end of the strings match""" 
 657          str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 
 658          str1 = self.config.lang.punctranslate(str1) 
 659          str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 
 660          return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation) 
 661   
 663          """checks that strings that are purely punctuation are not changed""" 
 664          # this test is a subset of startandend 
 665          if (decoration.ispurepunctuation(str1)): 
 666              return str1 == str2 
 667          else: 
 668              return not decoration.ispurepunctuation(str2) 
 669   
 671          """checks that the number of brackets in both strings match""" 
 672          str1 = self.filtervariables(str1) 
 673          str2 = self.filtervariables(str2) 
 674          messages = [] 
 675          missing = [] 
 676          extra = [] 
 677          for bracket in ("[", "]", "{", "}", "(", ")"): 
 678              count1 = str1.count(bracket) 
 679              count2 = str2.count(bracket) 
 680              if count2 < count1: 
 681                  missing.append("'%s'" % bracket) 
 682              elif count2 > count1: 
 683                  extra.append("'%s'" % bracket) 
 684          if missing: 
 685              messages.append("translation is missing %s" % ", ".join(missing)) 
 686          if extra: 
 687              messages.append("translation has extra %s" % ", ".join(extra)) 
 688          if messages: 
 689              raise FilterFailure(messages) 
 690          return True 
 691   
 693          """checks that the number of sentences in both strings match""" 
 694          sentences1 = len(self.config.sourcelang.sentences(str1)) 
 695          sentences2 = len(self.config.lang.sentences(str2)) 
 696          if not sentences1 == sentences2: 
 697              raise FilterFailure("The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 
 698          return True 
 699   
 701          """checks that options are not translated""" 
 702          str1 = self.filtervariables(str1) 
 703          for word1 in str1.split(): 
 704              if word1 != "--" and word1.startswith("--") and word1[-1].isalnum(): 
 705                  parts = word1.split("=") 
 706                  if not parts[0] in str2: 
 707                      raise FilterFailure("The option %s does not occur or is translated in the translation." % parts[0])  
 708                  if len(parts) > 1 and parts[1] in str2: 
 709                      raise FilterFailure("The parameter %(param)s in option %(option)s is not translated." % {"param": parts[0], "option": parts[1]}) 
 710          return True 
 711   
 713          """checks that the message starts with the correct capitalisation""" 
 714          str1 = self.filteraccelerators(str1) 
 715          str2 = self.filteraccelerators(str2) 
 716          if len(str1) > 1 and len(str2) > 1: 
 717              return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 
 718          if len(str1) == 0 and len(str2) == 0: 
 719              return True 
 720          if len(str1) == 0 or len(str2) == 0: 
 721              return False 
 722          return True 
 723   
 725          """checks the capitalisation of two strings isn't wildly different""" 
 726          str1 = self.removevariables(str1) 
 727          str2 = self.removevariables(str2) 
 728          # TODO: review this. The 'I' is specific to English, so it probably serves 
 729          # no purpose to get sourcelang.sentenceend 
 730          str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, " i ", str1) 
 731          capitals1 = helpers.filtercount(str1, type(str1).isupper) 
 732          capitals2 = helpers.filtercount(str2, type(str2).isupper) 
 733          alpha1 = helpers.filtercount(str1, type(str1).isalpha) 
 734          alpha2 = helpers.filtercount(str2, type(str2).isalpha) 
 735          # Capture the all caps case 
 736          if capitals1 == alpha1: 
 737              return capitals2 == alpha2 
 738          # some heuristic tests to try and see that the style of capitals is vaguely the same 
 739          if capitals1 == 0 or capitals1 == 1: 
 740              return capitals2 == capitals1 
 741          elif capitals1 < len(str1) / 10: 
 742              return capitals2 < len(str2) / 8 
 743          elif len(str1) < 10: 
 744              return abs(capitals1 - capitals2) < 3 
 745          elif capitals1 > len(str1) * 6 / 10: 
 746              return capitals2 > len(str2) * 6 / 10 
 747          else: 
 748              return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6  
 749   
 751          """checks that acronyms that appear are unchanged""" 
 752          acronyms = [] 
 753          allowed = [] 
 754          for startmatch, endmatch in self.config.varmatches: 
 755              allowed += decoration.getvariables(startmatch, endmatch)(str1) 
 756          allowed += self.config.musttranslatewords.keys() 
 757          str1 = self.filteraccelerators(self.filtervariables(str1)) 
 758          iter = self.config.lang.word_iter(str1) 
 759          str2 = self.filteraccelerators(self.filtervariables(str2)) 
 760          for word in iter: 
 761              if word.isupper() and len(word) > 1 and word not in allowed: 
 762                  if str2.find(word) == -1: 
 763                      acronyms.append(word) 
 764          if acronyms: 
 765              raise FilterFailure("acronyms should not be translated: " + ", ".join(acronyms)) 
 766          return True 
 767   
 769          """checks for repeated words in the translation""" 
 770          lastword = "" 
 771          without_newlines = "\n".join(str2.split("\n")) 
 772          words = self.filteraccelerators(self.removevariables(without_newlines)).replace(".", "").lower().split() 
 773          for word in words: 
 774              if word == lastword: 
 775                  raise FilterFailure("The word '%s' is repeated" % word) 
 776              lastword = word 
 777          return True 
 778   
 780          """checks that words configured as untranslatable appear in the translation too""" 
 781          if not self.config.notranslatewords: 
 782              return True 
 783          str1 = self.filtervariables(str1) 
 784          str2 = self.filtervariables(str2) 
 785          #The above is full of strange quotes and things in utf-8 encoding. 
 786          #single apostrophe perhaps problematic in words like "doesn't" 
 787          for seperator in self.config.punctuation: 
 788              if isinstance(str1, unicode): 
 789                  str1 = str1.replace(seperator, u" ") 
 790              else: 
 791                  str1 = str1.replace(seperator.encode("utf-8"), " ") 
 792              if isinstance(str2, unicode): 
 793                  str2 = str2.replace(seperator, u" ") 
 794              else: 
 795                  str2 = str2.replace(seperator.encode("utf-8"), " ") 
 796          words1 = self.filteraccelerators(str1).split() 
 797          words2 = self.filteraccelerators(str2).split() 
 798          stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 
 799          if stopwords: 
 800              raise FilterFailure("do not translate: %s" % (", ".join(stopwords))) 
 801          return True 
 802   
 804          """checks that words configured as definitely translatable don't appear in  
 805          the translation""" 
 806          if not self.config.musttranslatewords: 
 807              return True 
 808          str1 = self.removevariables(str1) 
 809          str2 = self.removevariables(str2) 
 810          #The above is full of strange quotes and things in utf-8 encoding. 
 811          #single apostrophe perhaps problematic in words like "doesn't" 
 812          for seperator in self.config.punctuation: 
 813              str1 = str1.replace(seperator, " ") 
 814              str2 = str2.replace(seperator, " ") 
 815          words1 = self.filteraccelerators(str1).split() 
 816          words2 = self.filteraccelerators(str2).split() 
 817          stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 
 818          if stopwords: 
 819              raise FilterFailure("please translate: %s" % (", ".join(stopwords))) 
 820          return True 
 821   
 823          """checks that only characters specified as valid appear in the translation""" 
 824          if not self.config.validcharsmap: 
 825              return True 
 826          invalid1 = str1.translate(self.config.validcharsmap) 
 827          invalid2 = str2.translate(self.config.validcharsmap) 
 828          invalidchars = ["'%s' (\\u%04x)" % (invalidchar.encode('utf-8'), ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 
 829          if invalidchars: 
 830              raise FilterFailure("invalid chars: %s" % (", ".join(invalidchars))) 
 831          return True 
 832   
 834          """checks that file paths have not been translated""" 
 835          for word1 in self.filteraccelerators(str1).split(): 
 836              if word1.startswith("/"): 
 837                  if not helpers.countsmatch(str1, str2, (word1,)): 
 838                      return False 
 839          return True 
 840   
 867   
 869          """checks to ensure that no KDE style comments appear in the translation""" 
 870          return str2.find("\n_:") == -1 and not str2.startswith("_:") 
 871   
 873          """checks for Gettext compendium conflicts (#-#-#-#-#)""" 
 874          return str2.find("#-#-#-#-#") == -1 
 875   
 877          """checks for English style plural(s) for you to review""" 
 878          def numberofpatterns(string, patterns): 
 879              number = 0 
 880              for pattern in patterns: 
 881                  number += len(re.findall(pattern, string)) 
 882              return number 
 883   
 884          sourcepatterns = ["\(s\)"] 
 885          targetpatterns = ["\(s\)"] 
 886          sourcecount = numberofpatterns(str1, sourcepatterns) 
 887          targetcount = numberofpatterns(str2, targetpatterns) 
 888          if self.config.lang.nplurals == 1: 
 889              return not targetcount 
 890          return sourcecount == targetcount 
 891   
 893          """checks words that don't pass a spell check""" 
 894          if not self.config.targetlanguage: 
 895              return True 
 896          str1 = self.filterxml(self.filteraccelerators(self.filtervariables(str1))) 
 897          str2 = self.filterxml(self.filteraccelerators(self.filtervariables(str2))) 
 898          ignore1 = [] 
 899          messages = [] 
 900          for word, index, suggestions in spelling.check(str1, lang="en"): 
 901              ignore1.append(word) 
 902          for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 
 903              if word in ignore1: 
 904                  continue 
 905              # hack to ignore hyphenisation rules 
 906              if word in suggestions: 
 907                  continue 
 908              if isinstance(str2, unicode) or isinstance(str1, unicode): 
 909                  messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions))) 
 910              else: 
 911                  messages.append("check spelling of %s (could be %s)" % (word, " / ".join(suggestions))) 
 912          if messages: 
 913              raise FilterFailure(messages) 
 914          return True 
 915   
 917          """checks for messages containing translation credits instead of normal translations.""" 
 918          return not str1 in self.config.credit_sources 
 919   
 920      # If the precondition filter is run and fails then the other tests listed are ignored 
 921      preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 
 922                                      "accelerators", "brackets", "endpunc", 
 923                                      "acronyms", "xmltags", "startpunc", 
 924                                      "endwhitespace", "startwhitespace", 
 925                                      "escapes", "doublequoting", "singlequoting",  
 926                                      "filepaths", "purepunc", "doublespacing", 
 927                                      "sentencecount", "numbers", "isfuzzy", 
 928                                      "isreview", "notranslatewords", "musttranslatewords", 
 929                                      "emails", "simpleplurals", "urls", "printf", 
 930                                      "tabs", "newlines", "functions", "options", 
 931                                      "blank", "nplurals"), 
 932                      "blank":        ("simplecaps", "variables", "startcaps", 
 933                                      "accelerators", "brackets", "endpunc", 
 934                                      "acronyms", "xmltags", "startpunc", 
 935                                      "endwhitespace", "startwhitespace", 
 936                                      "escapes", "doublequoting", "singlequoting",  
 937                                      "filepaths", "purepunc", "doublespacing", 
 938                                      "sentencecount", "numbers", "isfuzzy", 
 939                                      "isreview", "notranslatewords", "musttranslatewords", 
 940                                      "emails", "simpleplurals", "urls", "printf", 
 941                                      "tabs", "newlines", "functions", "options"), 
 942                      "credits":      ("simplecaps", "variables", "startcaps", 
 943                                      "accelerators", "brackets", "endpunc", 
 944                                      "acronyms", "xmltags", "startpunc", 
 945                                      "escapes", "doublequoting", "singlequoting",  
 946                                      "filepaths", "doublespacing", 
 947                                      "sentencecount", "numbers", 
 948                                      "emails", "simpleplurals", "urls", "printf", 
 949                                      "tabs", "newlines", "functions", "options"), 
 950                     "purepunc":      ("startcaps", "options"), 
 951                     "startcaps":     ("simplecaps",), 
 952                     "endwhitespace": ("endpunc",), 
 953                     "startwhitespace":("startpunc",), 
 954                     "unchanged":     ("doublewords",),  
 955                     "compendiumconflicts": ("accelerators", "brackets", "escapes",  
 956                                      "numbers", "startpunc", "long", "variables",  
 957                                      "startcaps", "sentencecount", "simplecaps", 
 958                                      "doublespacing", "endpunc", "xmltags", 
 959                                      "startwhitespace", "endwhitespace", 
 960                                      "singlequoting", "doublequoting", 
 961                                      "filepaths", "purepunc", "doublewords", "printf") } 
 962   
 963  # code to actually run the tests (use unittest?) 
 964   
 965  openofficeconfig = CheckerConfig( 
 966      accelmarkers = ["~"], 
 967      varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 
 968      ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)], 
 969      canchangetags = [("link", "name", None)] 
 970      ) 
 971   
 974          checkerconfig = kwargs.get("checkerconfig", None) 
 975          if checkerconfig is None: 
 976              checkerconfig = CheckerConfig() 
 977              kwargs["checkerconfig"] = checkerconfig 
 978          checkerconfig.update(openofficeconfig) 
 979          StandardChecker.__init__(self, **kwargs) 
 980   
 981  mozillaconfig = CheckerConfig( 
 982      accelmarkers = ["&"], 
 983      varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")], 
 984      criticaltests = ["accelerators"] 
 985      ) 
 986   
 989          checkerconfig = kwargs.get("checkerconfig", None) 
 990          if checkerconfig is None: 
 991              checkerconfig = CheckerConfig() 
 992              kwargs["checkerconfig"] = checkerconfig 
 993          checkerconfig.update(mozillaconfig) 
 994          StandardChecker.__init__(self, **kwargs) 
 995   
 996  gnomeconfig = CheckerConfig( 
 997      accelmarkers = ["_"], 
 998      varmatches = [("%", 1), ("$(", ")")], 
 999      credit_sources = [u"translator-credits"] 
1000      ) 
1001   
1004          checkerconfig = kwargs.get("checkerconfig", None) 
1005          if checkerconfig is None: 
1006              checkerconfig = CheckerConfig() 
1007              kwargs["checkerconfig"] = checkerconfig 
1008          checkerconfig.update(gnomeconfig) 
1009          StandardChecker.__init__(self, **kwargs) 
1010   
1011  kdeconfig = CheckerConfig( 
1012      accelmarkers = ["&"], 
1013      varmatches = [("%", 1)], 
1014      credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"] 
1015      ) 
1016   
1019          # TODO allow setup of KDE plural and translator comments so that they do 
1020          # not create false postives 
1021          checkerconfig = kwargs.get("checkerconfig", None) 
1022          if checkerconfig is None: 
1023              checkerconfig = CheckerConfig() 
1024              kwargs["checkerconfig"] = checkerconfig 
1025          checkerconfig.update(kdeconfig) 
1026          StandardChecker.__init__(self, **kwargs) 
1027   
1028  cclicenseconfig = CheckerConfig(varmatches = [("@", "@")]) 
1031          checkerconfig = kwargs.get("checkerconfig", None) 
1032          if checkerconfig is None: 
1033              checkerconfig = CheckerConfig() 
1034              kwargs["checkerconfig"] = checkerconfig 
1035          checkerconfig.update(cclicenseconfig) 
1036          StandardChecker.__init__(self, **kwargs) 
1037   
1038  projectcheckers = { 
1039      "openoffice": OpenOfficeChecker, 
1040      "mozilla": MozillaChecker, 
1041      "kde": KdeChecker, 
1042      "wx": KdeChecker, 
1043      "gnome": GnomeChecker, 
1044      "creativecommons": CCLicenseChecker 
1045      } 
1046   
1047   
1049      """The standard checks for common checks on translation units.""" 
1053   
1057   
1059          """Checks for the correct number of noun forms for plural translations.""" 
1060          if unit.hasplural(): 
1061              # if we don't have a valid nplurals value, don't run the test 
1062              nplurals = self.config.lang.nplurals 
1063              if nplurals > 0: 
1064                  return len(unit.target.strings) == nplurals 
1065          return True 
1066   
1068          """Checks if there is at least one suggested translation for this unit.""" 
1069          self.suggestion_store = getattr(self, 'suggestion_store', None) 
1070          suggestions = [] 
1071          if self.suggestion_store: 
1072              source = unit.source 
1073              suggestions = [unit for unit in self.suggestion_store.units if unit.source == source] 
1074          elif xliff and isinstance(unit, xliff.xliffunit): 
1075              # TODO: we probably want to filter them somehow 
1076              suggestions = unit.getalttrans() 
1077          return not bool(suggestions) 
1078   
1079   
1081      """verifies that the tests pass for a pair of strings""" 
1082      from translate.storage import base 
1083      str1 = data.forceunicode(str1) 
1084      str2 = data.forceunicode(str2) 
1085      unit = base.TranslationUnit(str1) 
1086      unit.target = str2 
1087      checker = StandardChecker(excludefilters=ignorelist) 
1088      failures = checker.run_filters(unit) 
1089      for testname, message in failures: 
1090          print "failure: %s: %s\n  %r\n  %r" % (testname, message, str1, str2) 
1091      return failures 
1092   
1094      """runs test on a batch of string pairs""" 
1095      passed, numpairs = 0, len(pairs) 
1096      for str1, str2 in pairs: 
1097          if runtests(str1, str2): 
1098              passed += 1 
1099      print 
1100      print "total: %d/%d pairs passed" % (passed, numpairs) 
1101   
1102  if __name__ == '__main__': 
1103      testset = [(r"simple", r"somple"), 
1104              (r"\this equals \that", r"does \this equal \that?"), 
1105              (r"this \'equals\' that", r"this 'equals' that"), 
1106              (r" start and end! they must match.", r"start and end! they must match."), 
1107              (r"check for matching %variables marked like %this", r"%this %variable is marked"), 
1108              (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 
1109              (r"check for mismatching %variables% too", r"how many %variable% are marked"), 
1110              (r"%% %%", r"%%"), 
1111              (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 
1112              (r"simple lowercase", r"it is all lowercase"), 
1113              (r"simple lowercase", r"It Is All Lowercase"), 
1114              (r"Simple First Letter Capitals", r"First Letters"), 
1115              (r"SIMPLE CAPITALS", r"First Letters"), 
1116              (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 
1117              (r"forgot to translate", r"  ") 
1118              ] 
1119      batchruntests(testset) 
1120   
| Trees | Indices | Help | 
 | 
|---|
| Generated by Epydoc 3.0.1 on Wed Mar 26 12:49:36 2008 | http://epydoc.sourceforge.net |