1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22  """classes that hold units of .dtd files (dtdunit) or entire files (dtdfile) 
 23  these are specific .dtd files for localisation used by mozilla""" 
 24   
 25  from translate.storage import base 
 26  from translate.misc import quote 
 27   
 28  import re 
 29  import sys 
 30  import warnings 
 31   
 40   
 52   
 53 -class dtdunit(base.TranslationUnit): 
  54      """this class represents an entity definition from a dtd file (and possibly associated comments)""" 
 56          """construct the dtdunit, prepare it for parsing""" 
 57          super(dtdunit, self).__init__(source) 
 58          self.comments = [] 
 59          self.unparsedlines = [] 
 60          self.incomment = 0 
 61          self.inentity = 0 
 62          self.entity = "FakeEntityOnlyForInitialisationAndTesting"  
 63          self.source = source 
  64   
 65       
 67          """Sets the definition to the quoted value of source""" 
 68          self.definition = quotefordtd(source) 
  69   
 71          """gets the unquoted source string""" 
 72          return unquotefromdtd(self.definition) 
  73      source = property(getsource, setsource) 
 74   
 80   
 82          """gets the unquoted target string""" 
 83          return unquotefromdtd(self.definition) 
  84      target = property(gettarget, settarget) 
 85   
 87          """returns whether this dtdunit doesn't actually have an entity definition""" 
 88           
 89           
 90          return self.entity is None 
  91   
 93          """read the first dtd element from the source code into this object, return linesprocessed""" 
 94          self.comments = [] 
 95           
 96          self.locfilenotes = self.comments 
 97          self.locgroupstarts = self.comments 
 98          self.locgroupends = self.comments 
 99          self.locnotes = self.comments 
100           
101           
102           
103           
104           
105          self.entity = None 
106          self.definition = '' 
107          if not dtdsrc: 
108              return 0 
109          lines = dtdsrc.split("\n") 
110          linesprocessed = 0 
111          comment = "" 
112          for line in lines: 
113              line += "\n" 
114              linesprocessed += 1 
115               
116              if not self.incomment: 
117                  if (line.find('<!--') != -1): 
118                      self.incomment = 1 
119                      self.continuecomment = 0 
120                       
121                      (comment, dummy) = quote.extract(line,"<!--","-->",None,0) 
122                      if comment.find('LOCALIZATION NOTE') != -1: 
123                          l = quote.findend(comment,'LOCALIZATION NOTE') 
124                          while (comment[l] == ' '): l += 1 
125                          if comment.find('FILE',l) == l: 
126                              self.commenttype = "locfile" 
127                          elif comment.find('BEGIN',l) == l: 
128                              self.commenttype = "locgroupstart" 
129                          elif comment.find('END',l) == l: 
130                              self.commenttype = "locgroupend" 
131                          else: 
132                              self.commenttype = "locnote" 
133                      else: 
134                           
135                          self.commenttype = "comment" 
136   
137              if self.incomment: 
138                   
139                  (comment, self.incomment) = quote.extract(line,"<!--","-->",None,self.continuecomment) 
140                   
141                  self.continuecomment = self.incomment 
142                   
143                  line = line.replace(comment, "", 1) 
144                   
145                  if not self.incomment: 
146                      if line.isspace(): 
147                          comment += line 
148                          line = '' 
149                      else: 
150                          comment += '\n' 
151                   
152                   
153                   
154                   
155                   
156                   
157                   
158                  commentpair = (self.commenttype,comment) 
159                  if self.commenttype == "locfile": 
160                      self.locfilenotes.append(commentpair) 
161                  elif self.commenttype == "locgroupstart": 
162                      self.locgroupstarts.append(commentpair) 
163                  elif self.commenttype == "locgroupend": 
164                      self.locgroupends.append(commentpair) 
165                  elif self.commenttype == "locnote": 
166                      self.locnotes.append(commentpair) 
167                  elif self.commenttype == "comment": 
168                      self.comments.append(commentpair) 
169   
170              if not self.inentity and not self.incomment: 
171                  entitypos = line.find('<!ENTITY') 
172                  if entitypos != -1: 
173                      self.inentity = 1 
174                      beforeentity = line[:entitypos].strip() 
175                      if beforeentity.startswith("#"): 
176                          self.hashprefix = beforeentity 
177                      self.entitypart = "start" 
178                  else: 
179                      self.unparsedlines.append(line) 
180   
181              if self.inentity: 
182                  if self.entitypart == "start": 
183                       
184                      e = quote.findend(line,'<!ENTITY') 
185                      line = line[e:] 
186                      self.entitypart = "name" 
187                      self.entitytype = "internal" 
188                  if self.entitypart == "name": 
189                      e = 0 
190                      while (e < len(line) and line[e].isspace()): e += 1 
191                      self.entity = '' 
192                      if (e < len(line) and line[e] == '%'): 
193                          self.entitytype = "external" 
194                          self.entityparameter = "" 
195                          e += 1 
196                          while (e < len(line) and line[e].isspace()): e += 1 
197                      while (e < len(line) and not line[e].isspace()): 
198                          self.entity += line[e] 
199                          e += 1 
200                      while (e < len(line) and line[e].isspace()): e += 1 
201                      if self.entity: 
202                          if self.entitytype == "external": 
203                              self.entitypart = "parameter" 
204                          else: 
205                              self.entitypart = "definition" 
206                           
207                          if e == len(line): 
208                              self.entityhelp = None 
209                              continue 
210                          elif self.entitypart == "definition": 
211                              self.entityhelp = (e,line[e]) 
212                              self.instring = 0 
213                  if self.entitypart == "parameter": 
214                      paramstart = e 
215                      while (e < len(line) and line[e].isalnum()): e += 1 
216                      self.entityparameter += line[paramstart:e] 
217                      while (e < len(line) and line[e].isspace()): e += 1 
218                      line = line[e:] 
219                      e = 0 
220                      if not line: 
221                          continue 
222                      if line[0] in ('"', "'"): 
223                          self.entitypart = "definition" 
224                          self.entityhelp = (e,line[e]) 
225                          self.instring = 0 
226                  if self.entitypart == "definition": 
227                      if self.entityhelp is None: 
228                          e = 0 
229                          while (e < len(line) and line[e].isspace()): e += 1 
230                          if e == len(line): 
231                              continue 
232                          self.entityhelp = (e,line[e]) 
233                          self.instring = 0 
234                       
235                      e = self.entityhelp[0] 
236                      if (self.entityhelp[1] == "'"): 
237                          (defpart,self.instring) = quote.extract(line[e:],"'","'",startinstring=self.instring,allowreentry=False) 
238                      elif (self.entityhelp[1] == '"'): 
239                          (defpart,self.instring) = quote.extract(line[e:],'"','"',startinstring=self.instring,allowreentry=False) 
240                      else: 
241                          raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1])) 
242                       
243                      self.entityhelp = (0,self.entityhelp[1]) 
244                      self.definition += defpart 
245                      if not self.instring: 
246                          self.inentity = 0 
247                          break 
248   
249           
250          if 0: 
251              for attr in dir(self): 
252                  r = repr(getattr(self,attr)) 
253                  if len(r) > 60: r = r[:57]+"..." 
254                  self.comments.append(("comment","self.%s = %s" % (attr,r) )) 
255          return linesprocessed 
 256   
263   
265          """convert the dtd entity back to string form""" 
266          lines = [] 
267          lines.extend([comment for commenttype,comment in self.comments]) 
268          lines.extend(self.unparsedlines) 
269          if self.isnull(): 
270              result = "".join(lines) 
271              return result.rstrip() + "\n" 
272           
273           
274           
275           
276          if len(self.entity) > 0: 
277              if getattr(self, 'entitytype', None) == 'external': 
278                  entityline = '<!ENTITY % '+self.entity+' '+self.entityparameter+' '+self.definition+'>' 
279              else: 
280                  entityline = '<!ENTITY '+self.entity+' '+self.definition+'>' 
281              if getattr(self, 'hashprefix', None): 
282                  entityline = self.hashprefix + " " + entityline 
283              if isinstance(entityline, unicode): 
284                  entityline = entityline.encode('UTF-8') 
285              lines.append(entityline+'\n') 
286          return "".join(lines) 
  287   
288 -class dtdfile(base.TranslationStore): 
 289      """this class represents a .dtd file, made up of dtdunits""" 
290      UnitClass = dtdunit 
300   
301 -    def parse(self, dtdsrc): 
 302          """read the source code of a dtd file in and include them as dtdunits in self.units (any existing units are lost)""" 
303          self.units = [] 
304          start = 0 
305          end = 0 
306          lines = dtdsrc.split("\n") 
307          while end < len(lines): 
308              if (start == end): end += 1 
309              foundentity = 0 
310              while end < len(lines): 
311                  if end >= len(lines): 
312                      break 
313                  if lines[end].find('<!ENTITY') > -1: 
314                      foundentity = 1 
315                  if foundentity and re.match("[\"']\s*>", lines[end]): 
316                      end += 1 
317                      break 
318                  end += 1 
319               
320   
321              linesprocessed = 1  
322              while linesprocessed >= 1: 
323                  newdtd = dtdunit() 
324                  try: 
325                      linesprocessed = newdtd.parse("\n".join(lines[start:end])) 
326                      if linesprocessed >= 1 and (not newdtd.isnull() or newdtd.unparsedlines): 
327                          self.units.append(newdtd) 
328                  except Exception, e: 
329                      warnings.warn("%s\nError occured between lines %d and %d:\n%s" % (e, start+1, end, "\n".join(lines[start:end]))) 
330                  start += linesprocessed 
 331   
338   
340          """convert the units back to source""" 
341          sources = [str(dtd) for dtd in self.units] 
342          return "".join(sources) 
 343   
345          """makes self.index dictionary keyed on entities""" 
346          self.index = {} 
347          for dtd in self.units: 
348              if not dtd.isnull(): 
349                  self.index[dtd.entity] = dtd 
 350   
352          for dtd in self.units: 
353              lines = dtd.definition.split("\n") 
354              if len(lines) > 1: 
355                  definition = lines[0] 
356                  for line in lines[1:]: 
357                      if definition[-1:].isspace() or line[:1].isspace(): 
358                          definition += line 
359                      else: 
360                          definition += " " + line 
361                  dtd.definition = definition 
  362   
363  if __name__ == "__main__": 
364      import sys 
365      d = dtdfile(sys.stdin) 
366      d.rewrap() 
367      sys.stdout.write(str(d)) 
368