1   
  2   
  3  from translate.convert import html2po 
  4  from translate.convert import po2html 
  5  from translate.convert import test_convert 
  6  from translate.misc import wStringIO 
  7   
 10          """Helper to convert html to po without a file.""" 
 11          inputfile = wStringIO.StringIO(markup) 
 12          convertor = html2po.html2po() 
 13          outputpo = convertor.convertfile(inputfile, "test", False, False) 
 14          return outputpo 
  15   
 16 -    def po2html(self, posource, htmltemplate): 
  23   
 25          """helper to check that we got the expected number of messages""" 
 26          actual = len(pofile.units) 
 27          if actual > 0: 
 28            if pofile.units[0].isheader(): 
 29              actual = actual - 1 
 30          print pofile 
 31          assert actual == expected 
  32   
 34          """helper to validate a PO message""" 
 35          if not pofile.units[0].isheader(): 
 36            unitnumber = unitnumber - 1 
 37          print 'unit source: ' + str(pofile.units[unitnumber].source) + '|' 
 38          print 'expected: ' + expected.encode('utf-8') + '|' 
 39          assert unicode(pofile.units[unitnumber].source) == unicode(expected) 
  40   
 46   
 51   
 53          """test to ensure that we no longer use the lang attribure""" 
 54          markup = '''<html lang="en"><head><title>My title</title></head><body></body></html>''' 
 55          pofile = self.html2po(markup) 
 56          self.countunits(pofile, 1) 
 57           
 58          self.compareunit(pofile, 1, "My title") 
  59   
 61          """test that we can extract the <title> tag""" 
 62          self.check_single("<html><head><title>My title</title></head><body></body></html>", "My title") 
  63   
 65          """Test a linebreak in the <title> tag""" 
 66          htmltext = '''<html> 
 67  <head> 
 68    <title>My 
 69  title</title> 
 70  </head> 
 71  <body> 
 72  </body> 
 73  </html> 
 74  ''' 
 75          self.check_single(htmltext, "My title") 
  76   
 80   
 82          """test that we can extract the <p> tag""" 
 83          self.check_single("<html><head></head><body><p>A paragraph.</p></body></html>", "A paragraph.") 
 84          markup = "<p>First line.<br>Second line.</p>" 
 85          pofile = self.html2po(markup) 
 86          self.compareunit(pofile, 1, "First line.<br>Second line.") 
  87   
 89          """Test newlines within the <p> tag.""" 
 90          htmltext = '''<html> 
 91  <head> 
 92  </head> 
 93  <body> 
 94  <p> 
 95  A paragraph is a section in a piece of writing, usually highlighting a 
 96  particular point or topic. It always begins on a new line and usually 
 97  with indentation, and it consists of at least one sentence. 
 98  </p> 
 99  </body> 
100  </html> 
101  ''' 
102          self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.") 
103          markup = "<p>First\nline.<br>Second\nline.</p>" 
104          pofile = self.html2po(markup) 
105          self.compareunit(pofile, 1, "First line.<br>Second line.") 
 106   
108          """test that we can extract the <div> tag""" 
109          self.check_single("<html><head></head><body><div>A paragraph.</div></body></html>", "A paragraph.") 
110          markup = "<div>First line.<br>Second line.</div>" 
111          pofile = self.html2po(markup) 
112          self.compareunit(pofile, 1, "First line.<br>Second line.") 
 113   
115          """Test linebreaks within a <div> tag.""" 
116          htmltext = '''<html> 
117  <head> 
118  </head> 
119  <body> 
120  <div> 
121  A paragraph is a section in a piece of writing, usually highlighting a 
122  particular point or topic. It always begins on a new line and usually 
123  with indentation, and it consists of at least one sentence. 
124  </div> 
125  </body> 
126  </html> 
127  ''' 
128          self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.") 
129          markup = "<div>First\nline.<br>Second\nline.</div>" 
130          pofile = self.html2po(markup) 
131          self.compareunit(pofile, 1, "First line.<br>Second line.") 
 132   
134          """test that we can extract the <a> tag""" 
135          self.check_single('<html><head></head><body><p>A paragraph with <a href="http://translate.org.za/">hyperlink</a>.</p></body></html>', 'A paragraph with <a href="http://translate.org.za/">hyperlink</a>.') 
 136   
138          """Test that we can extract the <a> tag with newlines in it.""" 
139          htmltext = '''<html> 
140  <head> 
141  </head> 
142  <body> 
143  <p>A 
144  paragraph 
145  with <a 
146  href="http://translate.org.za/">hyperlink</a> 
147  and 
148  newlines.</p></body></html> 
149  ''' 
150          self.check_single(htmltext, 'A paragraph with <a href="http://translate.org.za/">hyperlink</a> and newlines.') 
 151   
153          """Test that we can extract the alt attribute from the <img> tag.""" 
154          self.check_single('''<html><head></head><body><img src="picture.png" alt="A picture"></body></html>''', "A picture") 
 155   
157          """Test that we can extract the alt attribute from the <img> tag.""" 
158          htmlsource = '''<html><head></head><body><img src="images/topbar.jpg" width="750" height="80"></body></html>''' 
159          self.check_null(htmlsource) 
 160   
162          """Test that we can extract the summary attribute.""" 
163          self.check_single( '''<html><head></head><body><table summary="Table summary"></table></body></html>''', "Table summary") 
 164   
174   
176          markup = '''<table summary="This is the summary"><caption>A caption</caption><thead><tr><th abbr="Head 1">Heading One</th><th>Heading Two</th></thead><tfoot><tr><td>Foot One</td><td>Foot Two</td></tr></tfoot><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>''' 
177          pofile = self.html2po(markup) 
178          self.countunits(pofile, 9) 
179          self.compareunit(pofile, 1, "This is the summary") 
180          self.compareunit(pofile, 2, "A caption") 
181          self.compareunit(pofile, 3, "Head 1") 
182          self.compareunit(pofile, 4, "Heading One") 
183          self.compareunit(pofile, 5, "Heading Two") 
184          self.compareunit(pofile, 6, "Foot One") 
185          self.compareunit(pofile, 7, "Foot Two") 
186          self.compareunit(pofile, 8, "One") 
187          self.compareunit(pofile, 9, "Two") 
 188   
190          """Test that we ignore tables that are empty. 
191           
192          A table is deemed empty if it has no translatable content. 
193          """ 
194   
195          self.check_null('''<html><head></head><body><table><tr><td><img src="bob.png"></td></tr></table></body></html>''') 
196          self.check_null('''<html><head></head><body><table><tr><td> </td></tr></table></body></html>''') 
197          self.check_null('''<html><head></head><body><table><tr><td><strong></strong></td></tr></table></body></html>''') 
 198           
200          """Test to see if the address element is extracted""" 
201          self.check_single("<body><address>My address</address></body>", "My address") 
 202            
204          """Test to see if the h* elements are extracted""" 
205          markup = "<html><head></head><body><h1>Heading One</h1><h2>Heading Two</h2><h3>Heading Three</h3><h4>Heading Four</h4><h5>Heading Five</h5><h6>Heading Six</h6></body></html>" 
206          pofile = self.html2po(markup) 
207          self.countunits(pofile, 6) 
208          self.compareunit(pofile, 1, "Heading One") 
209          self.compareunit(pofile, 2, "Heading Two") 
210          self.compareunit(pofile, 3, "Heading Three") 
211          self.compareunit(pofile, 4, "Heading Four") 
212          self.compareunit(pofile, 5, "Heading Five") 
213          self.compareunit(pofile, 6, "Heading Six") 
 214   
216          """Test to see if h* elements with newlines can be extracted""" 
217          markup = "<html><head></head><body><h1>Heading\nOne</h1><h2>Heading\nTwo</h2><h3>Heading\nThree</h3><h4>Heading\nFour</h4><h5>Heading\nFive</h5><h6>Heading\nSix</h6></body></html>" 
218          pofile = self.html2po(markup) 
219          self.countunits(pofile, 6) 
220          self.compareunit(pofile, 1, "Heading One") 
221          self.compareunit(pofile, 2, "Heading Two") 
222          self.compareunit(pofile, 3, "Heading Three") 
223          self.compareunit(pofile, 4, "Heading Four") 
224          self.compareunit(pofile, 5, "Heading Five") 
225          self.compareunit(pofile, 6, "Heading Six") 
 226           
228          """Test to see if the definition list title (dt) element is extracted""" 
229          self.check_single("<html><head></head><body><dl><dt>Definition List Item Title</dt></dl></body></html>", "Definition List Item Title") 
 230           
232          """Test to see if the definition list description (dd) element is extracted""" 
233          self.check_single("<html><head></head><body><dl><dd>Definition List Item Description</dd></dl></body></html>", "Definition List Item Description") 
 234   
236          """test to check that we don't double extract a span item""" 
237          self.check_single("<html><head></head><body><p>You are a <span>Spanish</span> sentence.</p></body></html>", "You are a <span>Spanish</span> sentence.") 
 238   
248   
250          """check that we use the default style of msgid_comments to disambiguate duplicate messages""" 
251          markup = "<html><head></head><body><p>Duplicate</p><p>Duplicate</p></body></html>" 
252          pofile = self.html2po(markup) 
253          self.countunits(pofile, 2) 
254           
255          self.compareunit(pofile, 1, "Duplicate") 
256          self.compareunit(pofile, 2, "Duplicate") 
 257   
259          """check that we reflow multiline content to make it more readable for translators""" 
260          self.check_single('''<td valign="middle" width="96%"><font class="headingwhite">South 
261                    Africa</font></td>''', '''<font class="headingwhite">South Africa</font>''') 
 262   
270   
272          """Remove carriage returns from files in dos format.""" 
273          htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\r 
274  <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->\r 
275  <head>\r 
276  <!-- InstanceBeginEditable name="doctitle" -->\r 
277  <link href="fmfi.css" rel="stylesheet" type="text/css">\r 
278  </head>\r 
279  \r 
280  <body>\r 
281  <p>The rapid expansion of telecommunications infrastructure in recent\r 
282  years has helped to bridge the digital divide to a limited extent.</p> \r 
283  </body>\r 
284  <!-- InstanceEnd --></html>\r 
285  ''' 
286   
287          self.check_single(htmlsource, 'The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.') 
 288   
290          """Convert HTML input in iso-8859-1 correctly to unicode.""" 
291          htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 
292  <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" --> 
293  <head> 
294  <!-- InstanceBeginEditable name="doctitle" --> 
295  <title>FMFI - South Africa - CSIR Openphone - Overview</title> 
296  <!-- InstanceEndEditable --> 
297  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
298  <meta name="keywords" content="fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community"> 
299   
300  <!-- InstanceBeginEditable name="head" --> 
301  <!-- InstanceEndEditable --> 
302  <link href="../../../fmfi.css" rel="stylesheet" type="text/css"> 
303  </head> 
304   
305  <body> 
306  <p>We aim to please \x96 will you aim too, please?</p> 
307  <p>South Africa\x92s language diversity can be challenging.</p> 
308  </body> 
309  </html> 
310  ''' 
311          pofile = self.html2po(htmlsource) 
312   
313          self.countunits(pofile, 4) 
314          self.compareunit(pofile, 3, u'We aim to please \x96 will you aim too, please?') 
315          self.compareunit(pofile, 4, u'South Africa\x92s language diversity can be challenging.') 
 316   
318          """Ensure that unnecessary html is stripped from the resulting unit.""" 
319   
320          htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 
321  <html> 
322  <head> 
323  <title>FMFI - Contact</title> 
324  </head> 
325  <body> 
326  <table width="100%"  border="0" cellpadding="0" cellspacing="0"> 
327    <tr align="left" valign="top"> 
328      <td width="150" height="556">  
329        <table width="157" height="100%" border="0" cellspacing="0" id="leftmenubg-color"> 
330        <tr> 
331            <td align="left" valign="top" height="555">  
332              <table width="100%" border="0" cellspacing="0" cellpadding="2"> 
333                <tr align="left" valign="top" bgcolor="#660000">  
334                  <td width="4%"><strong></strong></td> 
335                  <td width="96%"><strong><font class="headingwhite">Projects</font></strong></td> 
336                </tr> 
337                <tr align="left" valign="top">  
338                  <td valign="middle" width="4%"><img src="images/arrow.gif" width="8" height="8"></td> 
339                  <td width="96%"><a href="index.html">Home Page</a></td> 
340                </tr> 
341              </table> 
342            </td> 
343        </tr> 
344      </table></td> 
345  </table> 
346  </body> 
347  </html> 
348  ''' 
349          pofile = self.html2po(htmlsource) 
350          self.countunits(pofile, 3) 
351          self.compareunit(pofile, 2, u'Projects') 
352          self.compareunit(pofile, 3, u'Home Page') 
353   
354           
355          pofile.units[1].target = 'Projekte' 
356          pofile.units[2].target = 'Tuisblad' 
357          htmlresult = self.po2html(str(pofile), htmlsource).replace('\n', ' ').replace('= "', '="').replace('> <', '><') 
358          snippet = '<td width="96%"><strong><font class="headingwhite">Projekte</font></strong></td>' 
359          assert snippet in htmlresult 
360          snippet = '<td width="96%"><a href="index.html">Tuisblad</a></td>' 
361          assert snippet in htmlresult 
 364      """Tests running actual html2po commands on files""" 
365      convertmodule = html2po 
366      defaultoptions = {"progress": "none"} 
367   
 374