1   
  2  """Text wrapping and filling. 
  3  """ 
  4   
  5   
  6   
  7   
  8   
  9  __revision__ = "$Id: textwrap.py 4103 2006-10-20 07:35:02Z dwaynebailey $" 
 10   
 11  import string, re 
 12   
 13   
 14   
 15   
 16  try: 
 17      True, False 
 18  except NameError: 
 19      (True, False) = (1, 0) 
 20   
 21  __all__ = ['TextWrapper', 'wrap', 'fill'] 
 22   
 23   
 24   
 25   
 26   
 27   
 28   
 29   
 30   
 31  _whitespace = '\t\n\x0b\x0c\r ' 
 32   
 34      """ 
 35      Object for wrapping/filling text.  The public interface consists of 
 36      the wrap() and fill() methods; the other methods are just there for 
 37      subclasses to override in order to tweak the default behaviour. 
 38      If you want to completely replace the main wrapping algorithm, 
 39      you'll probably have to override _wrap_chunks(). 
 40   
 41      Several instance attributes control various aspects of wrapping: 
 42        width (default: 70) 
 43          the maximum width of wrapped lines (unless break_long_words 
 44          is false) 
 45        initial_indent (default: "") 
 46          string that will be prepended to the first line of wrapped 
 47          output.  Counts towards the line's width. 
 48        subsequent_indent (default: "") 
 49          string that will be prepended to all lines save the first 
 50          of wrapped output; also counts towards each line's width. 
 51        expand_tabs (default: true) 
 52          Expand tabs in input text to spaces before further processing. 
 53          Each tab will become 1 .. 8 spaces, depending on its position in 
 54          its line.  If false, each tab is treated as a single character. 
 55        drop_whitespace (default: true) 
 56          Drop leading and trailing whitespace from lines. 
 57        replace_whitespace (default: true) 
 58          Replace all whitespace characters in the input text by spaces 
 59          after tab expansion.  Note that if expand_tabs is false and 
 60          replace_whitespace is true, every tab will be converted to a 
 61          single space! 
 62        fix_sentence_endings (default: false) 
 63          Ensure that sentence-ending punctuation is always followed 
 64          by two spaces.  Off by default because the algorithm is 
 65          (unavoidably) imperfect. 
 66        break_long_words (default: true) 
 67          Break words longer than 'width'.  If false, those words will not 
 68          be broken, and some lines might be longer than 'width'. 
 69      """ 
 70   
 71      whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace)) 
 72   
 73      unicode_whitespace_trans = {} 
 74      uspace = ord(u' ') 
 75      for x in map(ord, _whitespace): 
 76          unicode_whitespace_trans[x] = uspace 
 77   
 78       
 79       
 80       
 81       
 82       
 83       
 84      wordsep_re = re.compile( 
 85          r'(\s+|'                                   
 86          r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'    
 87          r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')    
 88   
 89       
 90       
 91      sentence_end_re = re.compile(r'[%s]'               
 92                                   r'[\.\!\?]'           
 93                                   r'[\"\']?'            
 94                                   % string.lowercase) 
 95   
 96   
 97 -    def __init__(self, 
 98                   width=70, 
 99                   initial_indent="", 
100                   subsequent_indent="", 
101                   expand_tabs=True, 
102                   drop_whitespace=True, 
103                   replace_whitespace=True, 
104                   fix_sentence_endings=False, 
105                   break_long_words=True): 
 106          self.width = width 
107          self.initial_indent = initial_indent 
108          self.subsequent_indent = subsequent_indent 
109          self.expand_tabs = expand_tabs 
110          self.drop_whitespace = drop_whitespace 
111          self.replace_whitespace = replace_whitespace 
112          self.fix_sentence_endings = fix_sentence_endings 
113          self.break_long_words = break_long_words 
 114   
115   
116       
117       
118   
119 -    def _munge_whitespace(self, text): 
 120          """_munge_whitespace(text : string) -> string 
121   
122          Munge whitespace in text: expand tabs and convert all other 
123          whitespace characters to spaces.  Eg. " foo\tbar\n\nbaz" 
124          becomes " foo    bar  baz". 
125          """ 
126          if self.expand_tabs: 
127              text = text.expandtabs() 
128          if self.replace_whitespace: 
129              if isinstance(text, str): 
130                  text = text.translate(self.whitespace_trans) 
131              elif isinstance(text, unicode): 
132                  text = text.translate(self.unicode_whitespace_trans) 
133          return text 
 134   
135   
136 -    def _split(self, text): 
 137          """_split(text : string) -> [string] 
138   
139          Split the text to wrap into indivisible chunks.  Chunks are 
140          not quite the same as words; see wrap_chunks() for full 
141          details.  As an example, the text 
142            Look, goof-ball -- use the -b option! 
143          breaks into the following chunks: 
144            'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 
145            'use', ' ', 'the', ' ', '-b', ' ', 'option!' 
146          """ 
147          chunks = self.wordsep_re.split(text) 
148          chunks = filter(None, chunks) 
149          return chunks 
 150   
151 -    def _fix_sentence_endings(self, chunks): 
 152          """_fix_sentence_endings(chunks : [string]) 
153   
154          Correct for sentence endings buried in 'chunks'.  Eg. when the 
155          original text contains "... foo.\nBar ...", munge_whitespace() 
156          and split() will convert that to [..., "foo.", " ", "Bar", ...] 
157          which has one too few spaces; this method simply changes the one 
158          space to two. 
159          """ 
160          i = 0 
161          pat = self.sentence_end_re 
162          while i < len(chunks)-1: 
163              if chunks[i+1] == " " and pat.search(chunks[i]): 
164                  chunks[i+1] = "  " 
165                  i += 2 
166              else: 
167                  i += 1 
 168   
169 -    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): 
 170          """_handle_long_word(chunks : [string], 
171                               cur_line : [string], 
172                               cur_len : int, width : int) 
173   
174          Handle a chunk of text (most likely a word, not whitespace) that 
175          is too long to fit in any line. 
176          """ 
177          space_left = max(width - cur_len, 1) 
178   
179           
180           
181          if self.break_long_words: 
182              cur_line.append(reversed_chunks[-1][:space_left]) 
183              reversed_chunks[-1] = reversed_chunks[-1][space_left:] 
184   
185           
186           
187           
188          elif not cur_line: 
189              cur_line.append(reversed_chunks.pop()) 
 190   
191           
192           
193           
194           
195           
196   
197 -    def _wrap_chunks(self, chunks): 
 198          """_wrap_chunks(chunks : [string]) -> [string] 
199   
200          Wrap a sequence of text chunks and return a list of lines of 
201          length 'self.width' or less.  (If 'break_long_words' is false, 
202          some lines may be longer than this.)  Chunks correspond roughly 
203          to words and the whitespace between them: each chunk is 
204          indivisible (modulo 'break_long_words'), but a line break can 
205          come between any two chunks.  Chunks should not have internal 
206          whitespace; ie. a chunk is either all whitespace or a "word". 
207          Whitespace chunks will be removed from the beginning and end of 
208          lines, but apart from that whitespace is preserved. 
209          """ 
210          lines = [] 
211          if self.width <= 0: 
212              raise ValueError("invalid width %r (must be > 0)" % self.width) 
213   
214           
215           
216          chunks.reverse() 
217   
218          while chunks: 
219   
220               
221               
222              cur_line = [] 
223              cur_len = 0 
224   
225               
226              if lines: 
227                  indent = self.subsequent_indent 
228              else: 
229                  indent = self.initial_indent 
230   
231               
232              width = self.width - len(indent) 
233   
234               
235               
236              if self.drop_whitespace and chunks[-1].strip() == '' and lines: 
237                  del chunks[-1] 
238   
239              while chunks: 
240                  l = len(chunks[-1]) 
241   
242                   
243                  if cur_len + l <= width: 
244                      cur_line.append(chunks.pop()) 
245                      cur_len += l 
246   
247                   
248                  else: 
249                      break 
250   
251               
252               
253              if chunks and len(chunks[-1]) > width: 
254                  self._handle_long_word(chunks, cur_line, cur_len, width) 
255   
256               
257              if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': 
258                  del cur_line[-1] 
259   
260               
261               
262              if cur_line: 
263                  lines.append(indent + ''.join(cur_line)) 
264   
265          return lines 
 266   
267   
268       
269   
270 -    def wrap(self, text): 
 271          """wrap(text : string) -> [string] 
272   
273          Reformat the single paragraph in 'text' so it fits in lines of 
274          no more than 'self.width' columns, and return a list of wrapped 
275          lines.  Tabs in 'text' are expanded with string.expandtabs(), 
276          and all other whitespace characters (including newline) are 
277          converted to space. 
278          """ 
279          text = self._munge_whitespace(text) 
280          chunks = self._split(text) 
281          if self.fix_sentence_endings: 
282              self._fix_sentence_endings(chunks) 
283          return self._wrap_chunks(chunks) 
 284   
285 -    def fill(self, text): 
 286          """fill(text : string) -> string 
287   
288          Reformat the single paragraph in 'text' to fit in lines of no 
289          more than 'self.width' columns, and return a new string 
290          containing the entire wrapped paragraph. 
291          """ 
292          return "\n".join(self.wrap(text)) 
  293   
294   
295   
296   
297 -def wrap(text, width=70, **kwargs): 
 298      """Wrap a single paragraph of text, returning a list of wrapped lines. 
299   
300      Reformat the single paragraph in 'text' so it fits in lines of no 
301      more than 'width' columns, and return a list of wrapped lines.  By 
302      default, tabs in 'text' are expanded with string.expandtabs(), and 
303      all other whitespace characters (including newline) are converted to 
304      space.  See TextWrapper class for available keyword args to customize 
305      wrapping behaviour. 
306      """ 
307      w = TextWrapper(width=width, **kwargs) 
308      return w.wrap(text) 
 309   
310 -def fill(text, width=70, **kwargs): 
 311      """Fill a single paragraph of text, returning a new string. 
312   
313      Reformat the single paragraph in 'text' to fit in lines of no more 
314      than 'width' columns, and return a new string containing the entire 
315      wrapped paragraph.  As with wrap(), tabs are expanded and other 
316      whitespace characters converted to space.  See TextWrapper class for 
317      available keyword args to customize wrapping behaviour. 
318      """ 
319      w = TextWrapper(width=width, **kwargs) 
320      return w.fill(text) 
 321   
322   
323   
324   
325  _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) 
326  _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) 
327   
329      """Remove any common leading whitespace from every line in `text`. 
330   
331      This can be used to make triple-quoted strings line up with the left 
332      edge of the display, while still presenting them in the source code 
333      in indented form. 
334   
335      Note that tabs and spaces are both treated as whitespace, but they 
336      are not equal: the lines "  hello" and "\thello" are 
337      considered to have no common leading whitespace.  (This behaviour is 
338      new in Python 2.5; older versions of this module incorrectly 
339      expanded tabs before searching for common leading whitespace.) 
340      """ 
341       
342       
343      margin = None 
344      text = _whitespace_only_re.sub('', text) 
345      indents = _leading_whitespace_re.findall(text) 
346      for indent in indents: 
347          if margin is None: 
348              margin = indent 
349   
350           
351           
352          elif indent.startswith(margin): 
353              pass 
354   
355           
356           
357          elif margin.startswith(indent): 
358              margin = indent 
359   
360           
361           
362          else: 
363              margin = "" 
364              break 
365   
366       
367      if 0 and margin: 
368          for line in text.split("\n"): 
369              assert not line or line.startswith(margin), \ 
370                     "line = %r, margin = %r" % (line, margin) 
371   
372      if margin: 
373          text = re.sub(r'(?m)^' + margin, '', text) 
374      return text 
 375   
376  if __name__ == "__main__": 
377       
378       
379      print dedent("Hello there.\n  This is indented.") 
380