| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2008 Zuza Software Foundation
5 #
6 # This file is part of translate.
7 #
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22 """This is a set of validation checks that can be performed on translation
23 units.
24
25 Derivatives of UnitChecker (like StandardUnitChecker) check translation units,
26 and derivatives of TranslationChecker (like StandardChecker) check
27 (source, target) translation pairs.
28
29 When adding a new test here, please document and explain the behaviour on the
30 U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}.
31 """
32
33 from translate.filters import helpers
34 from translate.filters import decoration
35 from translate.filters import prefilters
36 from translate.filters import spelling
37 from translate.lang import factory
38 from translate.lang import data
39 # The import of xliff could fail if the user doesn't have lxml installed. For
40 # now we try to continue gracefully to help users who aren't interested in
41 # support for XLIFF or other XML formats.
42 try:
43 from translate.storage import xliff
44 except ImportError, e:
45 xliff = None
46 # The import of xliff fail silently in the absence of lxml if another module
47 # already tried to import it unsuccessfully, so let's make 100% sure:
48 if not hasattr(xliff, "xliffunit"):
49 xliff = None
50 import re
51
52 # These are some regular expressions that are compiled for use in some tests
53
54 # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't
55 # cover everything we leave \w instead of specifying the exact letters as
56 # this should capture printf types defined in other platforms.
57 # extended to support Python named format specifiers
58 printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))')
59
60 # The name of the XML tag
61 tagname_re = re.compile("<[\s]*([\w\/]*)")
62
63 # We allow escaped quotes, probably for old escaping style of OOo helpcontent
64 #TODO: remove escaped strings once usage is audited
65 property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))")
66
67 # The whole tag
68 tag_re = re.compile("<[^>]+>")
69
70 gconf_attribute_re = re.compile('"[a-z_]+?"')
71
72
74 """Returns the name of the XML/HTML tag in string"""
75 return tagname_re.match(string).groups(1)[0]
76
77
79 """Tests to see if pair == (a,b,c) is in list, but handles None entries in
80 list as wildcards (only allowed in positions "a" and "c"). We take a
81 shortcut by only considering "c" if "b" has already matched."""
82 a, b, c = pair
83 if (b, c) == (None, None):
84 #This is a tagname
85 return pair
86 for pattern in list:
87 x, y, z = pattern
88 if (x, y) in [(a, b), (None, b)]:
89 if z in [None, c]:
90 return pattern
91 return pair
92
93
95 """Returns all the properties in the XML/HTML tag string as
96 (tagname, propertyname, propertyvalue), but ignore those combinations
97 specified in ignore."""
98 properties = []
99 for string in strings:
100 tag = tagname(string)
101 properties += [(tag, None, None)]
102 #Now we isolate the attribute pairs.
103 pairs = property_re.findall(string)
104 for property, value, a, b in pairs:
105 #Strip the quotes:
106 value = value[1:-1]
107
108 canignore = False
109 if (tag, property, value) in ignore or \
110 intuplelist((tag, property, value), ignore) != (tag, property, value):
111 canignore = True
112 break
113 if not canignore:
114 properties += [(tag, property, value)]
115 return properties
116
117
119 """This exception signals that a Filter didn't pass, and gives an
120 explanation or a comment"""
121
123 if not isinstance(messages, list):
124 messages = [messages]
125 assert isinstance(messages[0], unicode) # Assumption: all of same type
126 joined = u", ".join(messages)
127 Exception.__init__(self, joined)
128 # Python 2.3 doesn't have .args
129 if not hasattr(self, "args"):
130 self.args = joined
131
132
134 """This exception signals that a Filter didn't pass, and the bad translation
135 might break an application (so the string will be marked fuzzy)"""
136 pass
137
138 #(tag, attribute, value) specifies a certain attribute which can be changed/
139 #ignored if it exists inside tag. In the case where there is a third element
140 #in the tuple, it indicates a property value that can be ignored if present
141 #(like defaults, for example)
142 #If a certain item is None, it indicates that it is relevant for all values of
143 #the property/tag that is specified as None. A non-None value of "value"
144 #indicates that the value of the attribute must be taken into account.
145 common_ignoretags = [(None, "xml-lang", None)]
146 common_canchangetags = [("img", "alt", None), (None, "title", None)]
147 # Actually the title tag is allowed on many tags in HTML (but probably not all)
148
149
151 """object representing the configuration of a checker"""
152
153 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,
154 notranslatewords=None, musttranslatewords=None,
155 validchars=None, punctuation=None, endpunctuation=None,
156 ignoretags=None, canchangetags=None, criticaltests=None,
157 credit_sources=None):
158 # Init lists
159 self.accelmarkers = self._init_list(accelmarkers)
160 self.varmatches = self._init_list(varmatches)
161 self.criticaltests = self._init_list(criticaltests)
162 self.credit_sources = self._init_list(credit_sources)
163 # Lang data
164 self.targetlanguage = targetlanguage
165 self.updatetargetlanguage(targetlanguage)
166 self.sourcelang = factory.getlanguage('en')
167 # Inits with default values
168 self.punctuation = self._init_default(data.normalized_unicode(punctuation),
169 self.lang.punctuation)
170 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation),
171 self.lang.sentenceend)
172 self.ignoretags = self._init_default(ignoretags, common_ignoretags)
173 self.canchangetags = self._init_default(canchangetags, common_canchangetags)
174 # Other data
175 # TODO: allow user configuration of untranslatable words
176 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)])
177 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)])
178 validchars = data.normalized_unicode(validchars)
179 self.validcharsmap = {}
180 self.updatevalidchars(validchars)
181
183 """initialise configuration paramaters that are lists
184
185 @type list: List
186 @param list: None (we'll initialise a blank list) or a list paramater
187 @rtype: List
188 """
189 if list is None:
190 list = []
191 return list
192
194 """initialise parameters that can have default options
195
196 @param param: the user supplied paramater value
197 @param default: default values when param is not specified
198 @return: the paramater as specified by the user of the default settings
199 """
200 if param is None:
201 return default
202 return param
203
205 """combines the info in otherconfig into this config object"""
206 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage
207 self.updatetargetlanguage(self.targetlanguage)
208 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers])
209 self.varmatches.extend(otherconfig.varmatches)
210 self.notranslatewords.update(otherconfig.notranslatewords)
211 self.musttranslatewords.update(otherconfig.musttranslatewords)
212 self.validcharsmap.update(otherconfig.validcharsmap)
213 self.punctuation += otherconfig.punctuation
214 self.endpunctuation += otherconfig.endpunctuation
215 #TODO: consider also updating in the following cases:
216 self.ignoretags = otherconfig.ignoretags
217 self.canchangetags = otherconfig.canchangetags
218 self.criticaltests.extend(otherconfig.criticaltests)
219 self.credit_sources = otherconfig.credit_sources
220
222 """updates the map that eliminates valid characters"""
223 if validchars is None:
224 return True
225 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)])
226 self.validcharsmap.update(validcharsmap)
227
229 """Updates the target language in the config to the given target
230 language"""
231 self.lang = factory.getlanguage(langcode)
232
233
235
236 def cached_f(self, param1):
237 key = (f.__name__, param1)
238 res_cache = self.results_cache
239 if key in res_cache:
240 return res_cache[key]
241 else:
242 value = f(self, param1)
243 res_cache[key] = value
244 return value
245 return cached_f
246
247
249 """Parent Checker class which does the checking based on functions available
250 in derived classes."""
251 preconditions = {}
252
253 - def __init__(self, checkerconfig=None, excludefilters=None,
254 limitfilters=None, errorhandler=None):
255 self.errorhandler = errorhandler
256 if checkerconfig is None:
257 self.setconfig(CheckerConfig())
258 else:
259 self.setconfig(checkerconfig)
260 # exclude functions defined in UnitChecker from being treated as tests.
261 self.helperfunctions = {}
262 for functionname in dir(UnitChecker):
263 function = getattr(self, functionname)
264 if callable(function):
265 self.helperfunctions[functionname] = function
266 self.defaultfilters = self.getfilters(excludefilters, limitfilters)
267 self.results_cache = {}
268
270 """returns dictionary of available filters, including/excluding those in
271 the given lists"""
272 filters = {}
273 if limitfilters is None:
274 # use everything available unless instructed
275 limitfilters = dir(self)
276 if excludefilters is None:
277 excludefilters = {}
278 for functionname in limitfilters:
279 if functionname in excludefilters:
280 continue
281 if functionname in self.helperfunctions:
282 continue
283 if functionname == "errorhandler":
284 continue
285 filterfunction = getattr(self, functionname, None)
286 if not callable(filterfunction):
287 continue
288 filters[functionname] = filterfunction
289 return filters
290
292 """sets the accelerator list"""
293 self.config = config
294 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers]
295 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname)
296 for startmatch, endmatch in self.config.varmatches]
297 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch,
298 prefilters.varnone)
299 for startmatch, endmatch in self.config.varmatches]
300
302 """Sets the filename that a checker should use for evaluating
303 suggestions."""
304 self.suggestion_store = store
305 if self.suggestion_store:
306 self.suggestion_store.require_index()
307
311 filtervariables = cache_results(filtervariables)
312
316 removevariables = cache_results(removevariables)
317
319 """filter out accelerators from str1"""
320 return helpers.multifilter(str1, self.accfilters, None)
321 filteraccelerators = cache_results(filteraccelerators)
322
324 """filter out accelerators from str1"""
325 return helpers.multifilter(str1, self.accfilters, acceptlist)
326
328 """replaces words with punctuation with their unpunctuated
329 equivalents"""
330 return prefilters.filterwordswithpunctuation(str1)
331 filterwordswithpunctuation = cache_results(filterwordswithpunctuation)
332
336 filterxml = cache_results(filterxml)
337
339 """Runs the given test on the given unit.
340
341 Note that this can raise a FilterFailure as part of normal operation"""
342 return test(unit)
343
345 """run all the tests in this suite, return failures as testname,
346 message_or_exception"""
347 self.results_cache = {}
348 failures = {}
349 ignores = self.config.lang.ignoretests[:]
350 functionnames = self.defaultfilters.keys()
351 priorityfunctionnames = self.preconditions.keys()
352 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames)
353 for functionname in priorityfunctionnames + otherfunctionnames:
354 if functionname in ignores:
355 continue
356 filterfunction = getattr(self, functionname, None)
357 # this filterfunction may only be defined on another checker if
358 # using TeeChecker
359 if filterfunction is None:
360 continue
361 filtermessage = filterfunction.__doc__
362 try:
363 filterresult = self.run_test(filterfunction, unit)
364 except FilterFailure, e:
365 filterresult = False
366 filtermessage = e.args[0]
367 except Exception, e:
368 if self.errorhandler is None:
369 raise ValueError("error in filter %s: %r, %r, %s" % \
370 (functionname, unit.source, unit.target, e))
371 else:
372 filterresult = self.errorhandler(functionname, unit.source,
373 unit.target, e)
374 if not filterresult:
375 # we test some preconditions that aren't actually a cause for
376 # failure
377 if functionname in self.defaultfilters:
378 failures[functionname] = filtermessage
379 if functionname in self.preconditions:
380 for ignoredfunctionname in self.preconditions[functionname]:
381 ignores.append(ignoredfunctionname)
382 self.results_cache = {}
383 return failures
384
385
387 """A checker that passes source and target strings to the checks, not the
388 whole unit.
389
390 This provides some speedup and simplifies testing."""
391
392 - def __init__(self, checkerconfig=None, excludefilters=None,
393 limitfilters=None, errorhandler=None):
394 super(TranslationChecker, self).__init__(checkerconfig, excludefilters,
395 limitfilters, errorhandler)
396
398 """Runs the given test on the given unit.
399
400 Note that this can raise a FilterFailure as part of normal operation."""
401 if self.hasplural:
402 filtermessages = []
403 filterresult = True
404 for pluralform in unit.target.strings:
405 try:
406 if not test(self.str1, unicode(pluralform)):
407 filterresult = False
408 except FilterFailure, e:
409 filterresult = False
410 filtermessages.append(unicode(e.args))
411 if not filterresult and filtermessages:
412 raise FilterFailure(filtermessages)
413 else:
414 return filterresult
415 else:
416 return test(self.str1, self.str2)
417
419 """Do some optimisation by caching some data of the unit for the benefit
420 of run_test()."""
421 self.str1 = data.normalized_unicode(unit.source) or u""
422 self.str2 = data.normalized_unicode(unit.target) or u""
423 self.hasplural = unit.hasplural()
424 self.locations = unit.getlocations()
425 return super(TranslationChecker, self).run_filters(unit)
426
427
429 """A Checker that controls multiple checkers."""
430
431 - def __init__(self, checkerconfig=None, excludefilters=None,
432 limitfilters=None, checkerclasses=None, errorhandler=None,
433 languagecode=None):
434 """construct a TeeChecker from the given checkers"""
435 self.limitfilters = limitfilters
436 if checkerclasses is None:
437 checkerclasses = [StandardChecker]
438 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses]
439 if languagecode:
440 for checker in self.checkers:
441 checker.config.updatetargetlanguage(languagecode)
442 # Let's hook up the language specific checker
443 lang_checker = self.checkers[0].config.lang.checker
444 if lang_checker:
445 self.checkers.append(lang_checker)
446
447 self.combinedfilters = self.getfilters(excludefilters, limitfilters)
448 self.config = checkerconfig or self.checkers[0].config
449
451 """returns dictionary of available filters, including/excluding those in
452 the given lists"""
453 if excludefilters is None:
454 excludefilters = {}
455 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers]
456 self.combinedfilters = {}
457 for filters in filterslist:
458 self.combinedfilters.update(filters)
459 # TODO: move this somewhere more sensible (a checkfilters method?)
460 if limitfilters is not None:
461 for filtername in limitfilters:
462 if not filtername in self.combinedfilters:
463 import sys
464 print >> sys.stderr, "warning: could not find filter %s" % filtername
465 return self.combinedfilters
466
468 """run all the tests in the checker's suites"""
469 failures = {}
470 for checker in self.checkers:
471 failures.update(checker.run_filters(unit))
472 return failures
473
475 """Sets the filename that a checker should use for evaluating
476 suggestions."""
477 for checker in self.checkers:
478 checker.setsuggestionstore(store)
479
480
482 """The basic test suite for source -> target translations."""
483
485 """checks whether a string has been translated at all"""
486 str2 = prefilters.removekdecomments(str2)
487 return not (len(str1.strip()) > 0 and len(str2) == 0)
488
490 """checks whether a translation is basically identical to the original
491 string"""
492 str1 = self.filteraccelerators(self.removevariables(str1)).strip()
493 str2 = self.filteraccelerators(self.removevariables(str2)).strip()
494 if len(str1) < 2:
495 return True
496 # If the whole string is upperase, or nothing in the string can go
497 # towards uppercase, let's assume there is nothing translatable
498 # TODO: reconsider
499 if (str1.isupper() or str1.upper() == str1) and str1 == str2:
500 return True
501 if self.config.notranslatewords:
502 words1 = str1.split()
503 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]:
504 #currently equivalent to:
505 # if len(words1) == 1 and words1[0] in self.config.notranslatewords:
506 #why do we only test for one notranslate word?
507 return True
508 # we could also check for things like str1.isnumeric(), but the test
509 # above (str1.upper() == str1) makes this unnecessary
510 if str1.lower() == str2.lower():
511 raise FilterFailure(u"please translate")
512 return True
513
515 """checks whether a translation only contains spaces"""
516 len1 = len(str1.strip())
517 len2 = len(str2.strip())
518 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
519
521 """checks whether a translation is much shorter than the original
522 string"""
523 len1 = len(str1.strip())
524 len2 = len(str2.strip())
525 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
526
528 """checks whether a translation is much longer than the original
529 string"""
530 len1 = len(str1.strip())
531 len2 = len(str2.strip())
532 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
533
535 """checks whether escaping is consistent between the two strings"""
536 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")):
537 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word])
538 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word])
539 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2))
540 else:
541 return True
542
544 """checks whether newlines are consistent between the two strings"""
545 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")):
546 raise FilterFailure(u"line endings in original don't match line endings in translation")
547 else:
548 return True
549
551 """checks whether tabs are consistent between the two strings"""
552 if not helpers.countmatch(str1, str2, "\t"):
553 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation")
554 else:
555 return True
556
558 """checks whether singlequoting is consistent between the two strings"""
559 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))
560 str1 = self.config.lang.punctranslate(str1)
561 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))
562 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
563
565 """checks whether doublequoting is consistent between the two strings"""
566 str1 = self.filteraccelerators(self.filtervariables(str1))
567 str1 = self.filterxml(str1)
568 str1 = self.config.lang.punctranslate(str1)
569 str2 = self.filteraccelerators(self.filtervariables(str2))
570 str2 = self.filterxml(str2)
571 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«",
572 u"»", u"“", u"”"))
573
575 """checks for bad double-spaces by comparing to original"""
576 str1 = self.filteraccelerators(str1)
577 str2 = self.filteraccelerators(str2)
578 return helpers.countmatch(str1, str2, u" ")
579
581 """checks for bad spacing after punctuation"""
582 # Convert all nbsp to space, and just check spaces. Useful intermediate
583 # step to stricter nbsp checking?
584 str1 = self.filteraccelerators(self.filtervariables(str1))
585 str1 = self.config.lang.punctranslate(str1)
586 str1 = str1.replace(u"\u00a0", u" ")
587 if str1.find(u" ") == -1:
588 return True
589 str2 = self.filteraccelerators(self.filtervariables(str2))
590 str2 = str2.replace(u"\u00a0", u" ")
591 for puncchar in self.config.punctuation:
592 plaincount1 = str1.count(puncchar)
593 plaincount2 = str2.count(puncchar)
594 if not plaincount1 or plaincount1 != plaincount2:
595 continue
596 spacecount1 = str1.count(puncchar + u" ")
597 spacecount2 = str2.count(puncchar + u" ")
598 if spacecount1 != spacecount2:
599 # handle extra spaces that are because of transposed punctuation
600 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1 - spacecount2) == 1:
601 continue
602 return False
603 return True
604
606 """checks whether printf format strings match"""
607 count1 = count2 = plural = None
608 # self.hasplural only set by run_filters, not always available
609 if 'hasplural' in self.__dict__:
610 plural = self.hasplural
611 for var_num2, match2 in enumerate(printf_pat.finditer(str2)):
612 count2 = var_num2 + 1
613 str2key = match2.group('key')
614 if match2.group('ord'):
615 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
616 count1 = var_num1 + 1
617 if int(match2.group('ord')) == var_num1 + 1:
618 if match2.group('fullvar') != match1.group('fullvar'):
619 return 0
620 elif str2key:
621 str1key = None
622 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
623 count1 = var_num1 + 1
624 if match1.group('key') and str2key == match1.group('key'):
625 str1key = match1.group('key')
626 # '%.0s' "placeholder" in plural will match anything
627 if plural and match2.group('fullvar') == '.0s':
628 continue
629 if match1.group('fullvar') != match2.group('fullvar'):
630 return 0
631 if str1key == None:
632 return 0
633 else:
634 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
635 count1 = var_num1 + 1
636 # '%.0s' "placeholder" in plural will match anything
637 if plural and match2.group('fullvar') == '.0s':
638 continue
639 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')):
640 return 0
641
642 if count2 is None:
643 if list(printf_pat.finditer(str1)):
644 return 0
645
646 if (count1 or count2) and (count1 != count2):
647 return 0
648 return 1
649
651 """checks whether accelerators are consistent between the two strings"""
652 str1 = self.filtervariables(str1)
653 str2 = self.filtervariables(str2)
654 messages = []
655 for accelmarker in self.config.accelmarkers:
656 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel)
657 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel)
658 count1, countbad1 = counter1(str1)
659 count2, countbad2 = counter2(str2)
660 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel)
661 accel2, bad2 = getaccel(str2)
662 if count1 == count2:
663 continue
664 if count1 == 1 and count2 == 0:
665 if countbad2 == 1:
666 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0]))
667 else:
668 messages.append(u"accelerator %s is missing from translation" % accelmarker)
669 elif count1 == 0:
670 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker)
671 elif count1 == 1 and count2 > count1:
672 messages.append(u"accelerator %s is repeated in translation" % accelmarker)
673 else:
674 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2))
675 if messages:
676 if "accelerators" in self.config.criticaltests:
677 raise SeriousFilterFailure(messages)
678 else:
679 raise FilterFailure(messages)
680 return True
681
682 # def acceleratedvariables(self, str1, str2):
683 # """checks that no variables are accelerated"""
684 # messages = []
685 # for accelerator in self.config.accelmarkers:
686 # for variablestart, variableend in self.config.varmatches:
687 # error = accelerator + variablestart
688 # if str1.find(error) >= 0:
689 # messages.append(u"original has an accelerated variable")
690 # if str2.find(error) >= 0:
691 # messages.append(u"translation has an accelerated variable")
692 # if messages:
693 # raise FilterFailure(messages)
694 # return True
695
697 """checks whether variables of various forms are consistent between the
698 two strings"""
699 messages = []
700 mismatch1, mismatch2 = [], []
701 varnames1, varnames2 = [], []
702 for startmarker, endmarker in self.config.varmatches:
703 varchecker = decoration.getvariables(startmarker, endmarker)
704 if startmarker and endmarker:
705 if isinstance(endmarker, int):
706 redecorate = lambda var: startmarker + var
707 else:
708 redecorate = lambda var: startmarker + var + endmarker
709 elif startmarker:
710 redecorate = lambda var: startmarker + var
711 else:
712 redecorate = lambda var: var
713 vars1 = varchecker(str1)
714 vars2 = varchecker(str2)
715 if vars1 != vars2:
716 # we use counts to compare so we can handle multiple variables
717 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)]
718 # filter variable names we've already seen, so they aren't
719 # matched by more than one filter...
720 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2]
721 varnames1.extend(vars1)
722 varnames2.extend(vars2)
723 vars1 = map(redecorate, vars1)
724 vars2 = map(redecorate, vars2)
725 mismatch1.extend(vars1)
726 mismatch2.extend(vars2)
727 if mismatch1:
728 messages.append(u"do not translate: %s" % u", ".join(mismatch1))
729 elif mismatch2:
730 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2))
731 if messages and mismatch1:
732 raise SeriousFilterFailure(messages)
733 elif messages:
734 raise FilterFailure(messages)
735 return True
736
738 """checks that function names are not translated"""
739 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
740
742 """checks that emails are not translated"""
743 return helpers.funcmatch(str1, str2, decoration.getemails)
744
746 """checks that URLs are not translated"""
747 return helpers.funcmatch(str1, str2, decoration.geturls)
748
750 """checks whether numbers of various forms are consistent between the
751 two strings"""
752 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
753
755 """checks whether whitespace at the beginning of the strings matches"""
756 return helpers.funcmatch(str1, str2, decoration.spacestart)
757
759 """checks whether whitespace at the end of the strings matches"""
760 str1 = self.config.lang.punctranslate(str1)
761 return helpers.funcmatch(str1, str2, decoration.spaceend)
762
764 """checks whether punctuation at the beginning of the strings match"""
765 str1 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))))
766 str1 = self.config.lang.punctranslate(str1)
767 str2 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))))
768 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
769
771 """checks whether punctuation at the end of the strings match"""
772 str1 = self.filtervariables(str1)
773 str1 = self.config.lang.punctranslate(str1)
774 str2 = self.filtervariables(str2)
775 str1 = str1.rstrip()
776 str2 = str2.rstrip()
777 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
778
780 """checks that strings that are purely punctuation are not changed"""
781 # this test is a subset of startandend
782 if (decoration.ispurepunctuation(str1)):
783 return str1 == str2
784 else:
785 return not decoration.ispurepunctuation(str2)
786
788 """checks that the number of brackets in both strings match"""
789 str1 = self.filtervariables(str1)
790 str2 = self.filtervariables(str2)
791 messages = []
792 missing = []
793 extra = []
794 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"):
795 count1 = str1.count(bracket)
796 count2 = str2.count(bracket)
797 if count2 < count1:
798 missing.append(u"'%s'" % bracket)
799 elif count2 > count1:
800 extra.append(u"'%s'" % bracket)
801 if missing:
802 messages.append(u"translation is missing %s" % u", ".join(missing))
803 if extra:
804 messages.append(u"translation has extra %s" % u", ".join(extra))
805 if messages:
806 raise FilterFailure(messages)
807 return True
808
810 """checks that the number of sentences in both strings match"""
811 str1 = self.filteraccelerators(str1)
812 str2 = self.filteraccelerators(str2)
813 sentences1 = len(self.config.sourcelang.sentences(str1))
814 sentences2 = len(self.config.lang.sentences(str2))
815 if not sentences1 == sentences2:
816 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2))
817 return True
818
820 """checks that options are not translated"""
821 str1 = self.filtervariables(str1)
822 for word1 in str1.split():
823 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum():
824 parts = word1.split(u"=")
825 if not parts[0] in str2:
826 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0])
827 if len(parts) > 1 and parts[1] in str2:
828 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]})
829 return True
830
832 """checks that the message starts with the correct capitalisation"""
833 str1 = self.filteraccelerators(str1)
834 str2 = self.filteraccelerators(str2)
835 if len(str1) > 1 and len(str2) > 1:
836 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2)
837 if len(str1) == 0 and len(str2) == 0:
838 return True
839 if len(str1) == 0 or len(str2) == 0:
840 return False
841 return True
842
844 """checks the capitalisation of two strings isn't wildly different"""
845 str1 = self.removevariables(str1)
846 str2 = self.removevariables(str2)
847 # TODO: review this. The 'I' is specific to English, so it probably
848 # serves no purpose to get sourcelang.sentenceend
849 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1)
850 capitals1 = helpers.filtercount(str1, unicode.isupper)
851 capitals2 = helpers.filtercount(str2, unicode.isupper)
852 alpha1 = helpers.filtercount(str1, unicode.isalpha)
853 alpha2 = helpers.filtercount(str2, unicode.isalpha)
854 # Capture the all caps case
855 if capitals1 == alpha1:
856 return capitals2 == alpha2
857 # some heuristic tests to try and see that the style of capitals is
858 # vaguely the same
859 if capitals1 == 0 or capitals1 == 1:
860 return capitals2 == capitals1
861 elif capitals1 < len(str1) / 10:
862 return capitals2 <= len(str2) / 8
863 elif len(str1) < 10:
864 return abs(capitals1 - capitals2) < 3
865 elif capitals1 > len(str1) * 6 / 10:
866 return capitals2 > len(str2) * 6 / 10
867 else:
868 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
869
871 """checks that acronyms that appear are unchanged"""
872 acronyms = []
873 allowed = []
874 for startmatch, endmatch in self.config.varmatches:
875 allowed += decoration.getvariables(startmatch, endmatch)(str1)
876 allowed += self.config.musttranslatewords.keys()
877 str1 = self.filteraccelerators(self.filtervariables(str1))
878 iter = self.config.lang.word_iter(str1)
879 str2 = self.filteraccelerators(self.filtervariables(str2))
880 #TODO: strip XML? - should provide better error messsages
881 # see mail/chrome/messanger/smime.properties.po
882 #TODO: consider limiting the word length for recognising acronyms to
883 #something like 5/6 characters
884 for word in iter:
885 if word.isupper() and len(word) > 1 and word not in allowed:
886 if str2.find(word) == -1:
887 acronyms.append(word)
888 if acronyms:
889 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms))
890 return True
891
893 """checks for repeated words in the translation"""
894 lastword = ""
895 without_newlines = "\n".join(str2.split("\n"))
896 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split()
897 for word in words:
898 if word == lastword and word not in self.config.lang.validdoublewords:
899 raise FilterFailure(u"The word '%s' is repeated" % word)
900 lastword = word
901 return True
902
904 """checks that words configured as untranslatable appear in the
905 translation too"""
906 if not self.config.notranslatewords:
907 return True
908 str1 = self.filtervariables(str1)
909 str2 = self.filtervariables(str2)
910 #The above is full of strange quotes and things in utf-8 encoding.
911 #single apostrophe perhaps problematic in words like "doesn't"
912 for seperator in self.config.punctuation:
913 str1 = str1.replace(seperator, u" ")
914 str2 = str2.replace(seperator, u" ")
915 words1 = self.filteraccelerators(str1).split()
916 words2 = self.filteraccelerators(str2).split()
917 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
918 if stopwords:
919 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords)))
920 return True
921
923 """checks that words configured as definitely translatable don't appear
924 in the translation"""
925 if not self.config.musttranslatewords:
926 return True
927 str1 = self.removevariables(str1)
928 str2 = self.removevariables(str2)
929 # The above is full of strange quotes and things in utf-8 encoding.
930 # single apostrophe perhaps problematic in words like "doesn't"
931 for seperator in self.config.punctuation:
932 str1 = str1.replace(seperator, u" ")
933 str2 = str2.replace(seperator, u" ")
934 words1 = self.filteraccelerators(str1).split()
935 words2 = self.filteraccelerators(str2).split()
936 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2]
937 if stopwords:
938 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords)))
939 return True
940
942 """checks that only characters specified as valid appear in the
943 translation"""
944 if not self.config.validcharsmap:
945 return True
946 invalid1 = str1.translate(self.config.validcharsmap)
947 invalid2 = str2.translate(self.config.validcharsmap)
948 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1]
949 if invalidchars:
950 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars)))
951 return True
952
954 """checks that file paths have not been translated"""
955 for word1 in self.filteraccelerators(str1).split():
956 if word1.startswith(u"/"):
957 if not helpers.countsmatch(str1, str2, (word1,)):
958 return False
959 return True
960
988
990 """checks to ensure that no KDE style comments appear in the
991 translation"""
992 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
993
995 """checks for Gettext compendium conflicts (#-#-#-#-#)"""
996 return str2.find(u"#-#-#-#-#") == -1
997
999 """checks for English style plural(s) for you to review"""
1000
1001 def numberofpatterns(string, patterns):
1002 number = 0
1003 for pattern in patterns:
1004 number += len(re.findall(pattern, string))
1005 return number
1006
1007 sourcepatterns = ["\(s\)"]
1008 targetpatterns = ["\(s\)"]
1009 sourcecount = numberofpatterns(str1, sourcepatterns)
1010 targetcount = numberofpatterns(str2, targetpatterns)
1011 if self.config.lang.nplurals == 1:
1012 return not targetcount
1013 return sourcecount == targetcount
1014
1016 """checks words that don't pass a spell check"""
1017 if not self.config.targetlanguage:
1018 return True
1019 if not spelling.available:
1020 return True
1021 # TODO: filterxml?
1022 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel)
1023 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel)
1024 ignore1 = []
1025 messages = []
1026 for word, index, suggestions in spelling.check(str1, lang="en"):
1027 ignore1.append(word)
1028 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage):
1029 if word in self.config.notranslatewords:
1030 continue
1031 if word in ignore1:
1032 continue
1033 # hack to ignore hyphenisation rules
1034 if word in suggestions:
1035 continue
1036 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5])))
1037 if messages:
1038 raise FilterFailure(messages)
1039 return True
1040
1042 """checks for messages containing translation credits instead of normal
1043 translations."""
1044 return not str1 in self.config.credit_sources
1045
1046 # If the precondition filter is run and fails then the other tests listed are ignored
1047 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps",
1048 "accelerators", "brackets", "endpunc",
1049 "acronyms", "xmltags", "startpunc",
1050 "endwhitespace", "startwhitespace",
1051 "escapes", "doublequoting", "singlequoting",
1052 "filepaths", "purepunc", "doublespacing",
1053 "sentencecount", "numbers", "isfuzzy",
1054 "isreview", "notranslatewords", "musttranslatewords",
1055 "emails", "simpleplurals", "urls", "printf",
1056 "tabs", "newlines", "functions", "options",
1057 "blank", "nplurals", "gconf"),
1058 "blank": ("simplecaps", "variables", "startcaps",
1059 "accelerators", "brackets", "endpunc",
1060 "acronyms", "xmltags", "startpunc",
1061 "endwhitespace", "startwhitespace",
1062 "escapes", "doublequoting", "singlequoting",
1063 "filepaths", "purepunc", "doublespacing",
1064 "sentencecount", "numbers", "isfuzzy",
1065 "isreview", "notranslatewords", "musttranslatewords",
1066 "emails", "simpleplurals", "urls", "printf",
1067 "tabs", "newlines", "functions", "options",
1068 "gconf"),
1069 "credits": ("simplecaps", "variables", "startcaps",
1070 "accelerators", "brackets", "endpunc",
1071 "acronyms", "xmltags", "startpunc",
1072 "escapes", "doublequoting", "singlequoting",
1073 "filepaths", "doublespacing",
1074 "sentencecount", "numbers",
1075 "emails", "simpleplurals", "urls", "printf",
1076 "tabs", "newlines", "functions", "options"),
1077 "purepunc": ("startcaps", "options"),
1078 # This is causing some problems since Python 2.6, as
1079 # startcaps is now seen as an important one to always execute
1080 # and could now be done before it is blocked by a failing
1081 # "untranslated" or "blank" test. This is probably happening
1082 # due to slightly different implementation of the internal
1083 # dict handling since Python 2.6. We should never have relied
1084 # on this ordering anyway.
1085 #"startcaps": ("simplecaps",),
1086 "endwhitespace": ("endpunc",),
1087 "startwhitespace": ("startpunc",),
1088 "unchanged": ("doublewords",),
1089 "compendiumconflicts": ("accelerators", "brackets", "escapes",
1090 "numbers", "startpunc", "long", "variables",
1091 "startcaps", "sentencecount", "simplecaps",
1092 "doublespacing", "endpunc", "xmltags",
1093 "startwhitespace", "endwhitespace",
1094 "singlequoting", "doublequoting",
1095 "filepaths", "purepunc", "doublewords", "printf")}
1096
1097 # code to actually run the tests (use unittest?)
1098
1099 openofficeconfig = CheckerConfig(
1100 accelmarkers=["~"],
1101 varmatches=[("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"),
1102 ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0),
1103 ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)],
1104 ignoretags=[("alt", "xml-lang", None), ("ahelp", "visibility", "visible"),
1105 ("img", "width", None), ("img", "height", None)],
1106 canchangetags=[("link", "name", None)],
1107 )
1108
1110
1112 checkerconfig = kwargs.get("checkerconfig", None)
1113 if checkerconfig is None:
1114 checkerconfig = CheckerConfig()
1115 kwargs["checkerconfig"] = checkerconfig
1116 checkerconfig.update(openofficeconfig)
1117 StandardChecker.__init__(self, **kwargs)
1118
1119 mozillaconfig = CheckerConfig(
1120 accelmarkers=["&"],
1121 varmatches=[("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None),
1122 ("#", 1), ("${", "}"), ("$(^", ")")],
1123 criticaltests=["accelerators"],
1124 )
1125
1127
1129 checkerconfig = kwargs.get("checkerconfig", None)
1130 if checkerconfig is None:
1131 checkerconfig = CheckerConfig()
1132 kwargs["checkerconfig"] = checkerconfig
1133 checkerconfig.update(mozillaconfig)
1134 StandardChecker.__init__(self, **kwargs)
1135
1143
1144 drupalconfig = CheckerConfig(
1145 varmatches=[("%", None), ("@", None), ("!", None)],
1146 )
1147
1149
1151 checkerconfig = kwargs.get("checkerconfig", None)
1152 if checkerconfig is None:
1153 checkerconfig = CheckerConfig()
1154 kwargs["checkerconfig"] = checkerconfig
1155 checkerconfig.update(drupalconfig)
1156 StandardChecker.__init__(self, **kwargs)
1157
1158 gnomeconfig = CheckerConfig(
1159 accelmarkers=["_"],
1160 varmatches=[("%", 1), ("$(", ")")],
1161 credit_sources=[u"translator-credits"],
1162 )
1163
1165
1167 checkerconfig = kwargs.get("checkerconfig", None)
1168 if checkerconfig is None:
1169 checkerconfig = CheckerConfig()
1170 kwargs["checkerconfig"] = checkerconfig
1171 checkerconfig.update(gnomeconfig)
1172 StandardChecker.__init__(self, **kwargs)
1173
1175 """Checks if we have any gconf config settings translated."""
1176 for location in self.locations:
1177 if location.find('schemas.in') != -1:
1178 gconf_attributes = gconf_attribute_re.findall(str1)
1179 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
1180 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2]
1181 if stopwords:
1182 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords)))
1183 return True
1184
1185 kdeconfig = CheckerConfig(
1186 accelmarkers=["&"],
1187 varmatches=[("%", 1)],
1188 credit_sources=[u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"],
1189 )
1190
1192
1194 # TODO allow setup of KDE plural and translator comments so that they do
1195 # not create false postives
1196 checkerconfig = kwargs.get("checkerconfig", None)
1197 if checkerconfig is None:
1198 checkerconfig = CheckerConfig()
1199 kwargs["checkerconfig"] = checkerconfig
1200 checkerconfig.update(kdeconfig)
1201 StandardChecker.__init__(self, **kwargs)
1202
1203 cclicenseconfig = CheckerConfig(varmatches=[("@", "@")])
1204
1206
1208 checkerconfig = kwargs.get("checkerconfig", None)
1209 if checkerconfig is None:
1210 checkerconfig = CheckerConfig()
1211 kwargs["checkerconfig"] = checkerconfig
1212 checkerconfig.update(cclicenseconfig)
1213 StandardChecker.__init__(self, **kwargs)
1214
1215 projectcheckers = {
1216 "openoffice": OpenOfficeChecker,
1217 "mozilla": MozillaChecker,
1218 "kde": KdeChecker,
1219 "wx": KdeChecker,
1220 "gnome": GnomeChecker,
1221 "creativecommons": CCLicenseChecker,
1222 "drupal": DrupalChecker,
1223 }
1224
1225
1227 """The standard checks for common checks on translation units."""
1228
1232
1236
1238 """Checks for the correct number of noun forms for plural
1239 translations."""
1240 if unit.hasplural():
1241 # if we don't have a valid nplurals value, don't run the test
1242 nplurals = self.config.lang.nplurals
1243 if nplurals > 0:
1244 return len(unit.target.strings) == nplurals
1245 return True
1246
1248 """Checks if there is at least one suggested translation for this
1249 unit."""
1250 self.suggestion_store = getattr(self, 'suggestion_store', None)
1251 suggestions = []
1252 if self.suggestion_store:
1253 suggestions = self.suggestion_store.findunits(unit.source)
1254 elif xliff and isinstance(unit, xliff.xliffunit):
1255 # TODO: we probably want to filter them somehow
1256 suggestions = unit.getalttrans()
1257 return not bool(suggestions)
1258
1259
1261 """verifies that the tests pass for a pair of strings"""
1262 from translate.storage import base
1263 str1 = data.normalized_unicode(str1)
1264 str2 = data.normalized_unicode(str2)
1265 unit = base.TranslationUnit(str1)
1266 unit.target = str2
1267 checker = StandardChecker(excludefilters=ignorelist)
1268 failures = checker.run_filters(unit)
1269 for test in failures:
1270 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2)
1271 return failures
1272
1273
1275 """runs test on a batch of string pairs"""
1276 passed, numpairs = 0, len(pairs)
1277 for str1, str2 in pairs:
1278 if runtests(str1, str2):
1279 passed += 1
1280 print
1281 print "total: %d/%d pairs passed" % (passed, numpairs)
1282
1283 if __name__ == '__main__':
1284 testset = [(r"simple", r"somple"),
1285 (r"\this equals \that", r"does \this equal \that?"),
1286 (r"this \'equals\' that", r"this 'equals' that"),
1287 (r" start and end! they must match.", r"start and end! they must match."),
1288 (r"check for matching %variables marked like %this", r"%this %variable is marked"),
1289 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"),
1290 (r"check for mismatching %variables% too", r"how many %variable% are marked"),
1291 (r"%% %%", r"%%"),
1292 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"),
1293 (r"simple lowercase", r"it is all lowercase"),
1294 (r"simple lowercase", r"It Is All Lowercase"),
1295 (r"Simple First Letter Capitals", r"First Letters"),
1296 (r"SIMPLE CAPITALS", r"First Letters"),
1297 (r"SIMPLE CAPITALS", r"ALL CAPITALS"),
1298 (r"forgot to translate", r" "),
1299 ]
1300 batchruntests(testset)
1301
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Wed May 12 18:08:28 2010 | http://epydoc.sourceforge.net |