1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23  """Parent class for LISA standards (TMX, TBX, XLIFF)""" 
 24   
 25  import re 
 26   
 27  from translate.storage import base 
 28  from translate.lang import data 
 29  try: 
 30      from lxml import etree 
 31  except ImportError, e: 
 32      raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.") 
 33   
 35      """joins together the text from all the text nodes in the nodelist and their children""" 
 36       
 37       
 38      if node:     
 39          return node.xpath("string()")  
 40      else: 
 41          return data.forceunicode(node.text) or u"" 
  42           
 43   
 45      """generate match objects for all @re_obj matches in @text.""" 
 46      start = 0 
 47      max = len(text) 
 48      while start < max: 
 49          m = re_obj.search(text, start) 
 50          if not m: break 
 51          yield m 
 52          start = m.end() 
  53   
 54  placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)'] 
 55  re_placeholders = [re.compile(ph) for ph in placeholders] 
 65   
 66  XML_NS = 'http://www.w3.org/XML/1998/namespace' 
 67   
 69      """Sets the xml:lang attribute on node""" 
 70      node.set("{%s}lang" % XML_NS, lang) 
  71   
 73      """Sets the xml:space attribute on node""" 
 74      node.set("{%s}space" % XML_NS, value) 
  75   
 77      """Returns name in Clark notation within the given namespace. 
 78   
 79      For example namespaced("source") in an XLIFF document might return 
 80          {urn:oasis:names:tc:xliff:document:1.1}source 
 81      This is needed throughout lxml. 
 82      """ 
 83      if namespace: 
 84          return "{%s}%s" % (namespace, name) 
 85      else: 
 86          return name 
  87   
 89      """A single unit in the file.  
 90  Provisional work is done to make several languages possible.""" 
 91   
 92       
 93      rootNode = "" 
 94       
 95      languageNode = "" 
 96       
 97      textNode = "" 
 98   
 99      namespace = None 
100   
101 -    def __init__(self, source, empty=False): 
 102          """Constructs a unit containing the given source string""" 
103          if empty: 
104              return 
105          self.xmlelement = etree.Element(self.rootNode) 
106           
107   
108          super(LISAunit, self).__init__(source) 
 109   
111          """Compares two units""" 
112          languageNodes = self.getlanguageNodes() 
113          otherlanguageNodes = other.getlanguageNodes() 
114          if len(languageNodes) != len(otherlanguageNodes): 
115              return False 
116          for i in range(len(languageNodes)): 
117              mytext = self.getNodeText(languageNodes[i]) 
118              othertext = other.getNodeText(otherlanguageNodes[i]) 
119              if mytext != othertext: 
120                   
121                  return False 
122          return True 
 123   
125          """Returns name in Clark notation. 
126   
127          For example namespaced("source") in an XLIFF document might return 
128              {urn:oasis:names:tc:xliff:document:1.1}source 
129          This is needed throughout lxml. 
130          """ 
131          return namespaced(self.namespace, name) 
 132   
133 -    def setsource(self, source, sourcelang='en'): 
 141   
144      source = property(getsource, setsource) 
145   
146 -    def settarget(self, text, lang='xx', append=False): 
 163   
172      target = property(gettarget, settarget) 
173   
175          """Returns a xml Element setup with given parameters to represent a  
176          single language entry. Has to be overridden.""" 
177          return None 
 178   
180          """Create the text node in parent containing all the ph tags""" 
181          matches = _getPhMatches(text) 
182          if not matches: 
183              parent.text = text 
184              return 
185   
186           
187          start = matches[0].start() 
188          pretext = text[:start] 
189          if pretext: 
190              parent.text = pretext 
191          lasttag = parent 
192          for i, m in enumerate(matches): 
193               
194              pretext = text[start:m.start()] 
195               
196              if pretext: 
197                  lasttag.tail = pretext 
198               
199              phnode = etree.SubElement(parent, "ph") 
200              phnode.set("id", str(i+1)) 
201              phnode.text = m.group() 
202              lasttag = phnode 
203              start = m.end() 
204           
205          if text[start:]: 
206              lasttag.tail = text[start:] 
 207   
209          """Returns a list of all nodes that contain per language information.""" 
210          return self.xmlelement.findall(self.namespaced(self.languageNode)) 
 211   
213          """Retrieves a languageNode either by language or by index""" 
214          if lang is None and index is None: 
215              raise KeyError("No criterea for languageNode given") 
216          languageNodes = self.getlanguageNodes() 
217          if lang: 
218              for set in languageNodes: 
219                  if set.get("{%s}lang" % XML_NS) == lang: 
220                      return set 
221          else: 
222              if index >= len(languageNodes): 
223                  return None 
224              else: 
225                  return languageNodes[index] 
226          return None 
 227   
228 -    def getNodeText(self, languageNode): 
 229          """Retrieves the term from the given languageNode""" 
230          if languageNode is None: 
231              return None 
232          if self.textNode: 
233              terms = languageNode.findall('.//%s' % self.namespaced(self.textNode)) 
234              if len(terms) == 0: 
235                  return None 
236              return getText(terms[0]) 
237          else: 
238              return getText(languageNode) 
 239   
241          return etree.tostring(self.xmlelement, pretty_print=True, encoding='utf-8') 
 242   
244          term = cls(None, empty=True) 
245          term.xmlelement = element 
246          return term 
 247      createfromxmlElement = classmethod(createfromxmlElement) 
 248   
250      """A class representing a file store for one of the LISA file formats.""" 
251      UnitClass = LISAunit 
252       
253      rootNode = "" 
254       
255      bodyNode = "" 
256       
257      XMLskeleton = "" 
258   
259      namespace = None 
260   
261 -    def __init__(self, inputfile=None, sourcelanguage='en', targetlanguage=None, unitclass=None): 
 273   
275          """Method to be overridden to initialise headers, etc.""" 
276          pass 
 277   
279          """Returns name in Clark notation. 
280   
281          For example namespaced("source") in an XLIFF document might return 
282              {urn:oasis:names:tc:xliff:document:1.1}source 
283          This is needed throughout lxml. 
284          """ 
285          return namespaced(self.namespace, name) 
 286   
287 -    def initbody(self): 
 288          """Initialises self.body so it never needs to be retrieved from the XML again.""" 
289          self.namespace = self.document.getroot().nsmap.get(None, None) 
290          self.body = self.document.find('//%s' % self.namespaced(self.bodyNode)) 
 291   
293          """Sets the source language for this store""" 
294          self.sourcelanguage = sourcelanguage 
 295   
297          """Sets the target language for this store""" 
298          self.targetlanguage = targetlanguage 
 299   
301           
302          """Adds and returns a new unit with the given string as first entry.""" 
303          newunit = self.UnitClass(source) 
304          self.addunit(newunit) 
305          return newunit 
 306   
311   
313          """Converts to a string containing the file's XML""" 
314          return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8') 
 315   
317          """Populates this object from the given xml string""" 
318          if not hasattr(self, 'filename'): 
319              self.filename = getattr(xml, 'name', '') 
320          if hasattr(xml, "read"): 
321              xml.seek(0) 
322              posrc = xml.read() 
323              xml = posrc 
324          self.document = etree.fromstring(xml).getroottree() 
325          self.encoding = self.document.docinfo.encoding 
326          self.initbody() 
327          assert self.document.getroot().tag == self.namespaced(self.rootNode) 
328          termEntries = self.body.findall('.//%s' % self.namespaced(self.UnitClass.rootNode)) 
329          if termEntries is None: 
330              return 
331          for entry in termEntries: 
332              term = self.UnitClass.createfromxmlElement(entry) 
333              term.namespace = self.namespace 
334              self.units.append(term) 
  335