1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22  """functions to get decorative/informative text out of strings...""" 
 23   
 24  import re 
 25  import unicodedata 
 26   
 28      """returns all the whitespace from the start of the string""" 
 29      newstring = "" 
 30      for c in str1: 
 31          if not c.isspace(): return newstring 
 32          else: newstring += c 
 33      return newstring 
  34   
 36      """returns all the whitespace from the end of the string""" 
 37      newstring = "" 
 38      for n in range(len(str1)): 
 39          c = str1[-1-n] 
 40          if not c.isspace(): return newstring 
 41          else: newstring = c + newstring 
 42      return newstring 
  43   
 45      """returns all the punctuation from the start of the string""" 
 46      newstring = "" 
 47      for c in str1: 
 48          if c not in punctuation and not c.isspace(): return newstring 
 49          else: newstring += c 
 50      return newstring 
  51   
 53      """returns all the punctuation from the end of the string""" 
 54      newstring = "" 
 55      for n in range(len(str1)): 
 56          c = str1[-1-n] 
 57          if c not in punctuation and not c.isspace(): return newstring 
 58          else: newstring = c + newstring 
 59      return newstring 
  60   
 62      """checks whether the string is entirely punctuation""" 
 63      for c in str1: 
 64          if c.isalpha(): return 0 
 65      return len(str1) 
  66   
 68      """returns whether the given accelerator string is a valid one...""" 
 69      if len(accelerator) == 0 or accelerator in ignorelist: 
 70          return 0 
 71      accelerator = accelerator.replace("_","") 
 72      if not accelerator.isalnum(): 
 73          return False 
 74       
 75       
 76       
 77      decomposition = unicodedata.decomposition(accelerator) 
 78       
 79      decomposition = re.sub("<[^>]+>", "", decomposition).strip() 
 80      return decomposition.count(" ") == 0 
  81   
 83      """returns all the accelerators and locations in str1 marked with a given marker""" 
 84      accelerators = [] 
 85      badaccelerators = [] 
 86      currentpos = 0 
 87      while currentpos >= 0: 
 88          currentpos = str1.find(accelmarker, currentpos) 
 89          if currentpos >= 0: 
 90              accelstart = currentpos 
 91              currentpos += len(accelmarker) 
 92               
 93              accelend = currentpos + 1 
 94              if accelend > len(str1): break 
 95              accelerator = str1[currentpos:accelend] 
 96              currentpos = accelend 
 97              if isvalidaccelerator(accelerator, ignorelist): 
 98                  accelerators.append((accelstart, accelerator)) 
 99              else: 
100                  badaccelerators.append((accelstart, accelerator)) 
101      return accelerators, badaccelerators 
 102   
104      """returns all the variables and locations in str1 marked with a given marker""" 
105      variables = [] 
106      currentpos = 0 
107      while currentpos >= 0: 
108          variable = None 
109          currentpos = str1.find(startmarker, currentpos) 
110          if currentpos >= 0: 
111              startmatch = currentpos 
112              currentpos += len(startmarker) 
113              if endmarker is None: 
114                   
115                  endmatch = currentpos 
116                  for n in range(currentpos, len(str1)): 
117                      if not str1[n].isalnum(): 
118                          endmatch = n 
119                          break 
120                  if currentpos == endmatch: endmatch = len(str1) 
121                  if currentpos < endmatch: 
122                      variable = str1[currentpos:endmatch] 
123                  currentpos = endmatch 
124              elif type(endmarker) == int: 
125                   
126                  endmatch = currentpos + endmarker 
127                  if endmatch > len(str1): break 
128                  variable = str1[currentpos:endmatch] 
129                  currentpos = endmatch 
130              else: 
131                  endmatch = str1.find(endmarker, currentpos) 
132                  if endmatch == -1: break 
133                   
134                  start2 = str1.rfind(startmarker, currentpos, endmatch) 
135                  if start2 != -1: 
136                      startmatch2 = start2 
137                      start2 += len(startmarker) 
138                      if start2 != currentpos: 
139                          currentpos = start2 
140                          startmatch = startmatch2 
141                  variable = str1[currentpos:endmatch] 
142                  currentpos = endmatch + len(endmarker) 
143              if variable is not None and variable not in ignorelist: 
144                  if not variable or variable.replace("_","").replace(".","").isalnum(): 
145                      variables.append((startmatch, variable)) 
146      return variables 
 147   
149      """returns a function that gets a list of accelerators marked using accelmarker""" 
150      def getmarkedaccelerators(str1): 
151          """returns all the accelerators in str1 marked with a given marker""" 
152          acclocs, badlocs = findaccelerators(str1, accelmarker, ignorelist) 
153          accelerators = [accelerator for accelstart, accelerator in acclocs] 
154          badaccelerators = [accelerator for accelstart, accelerator in badlocs] 
155          return accelerators, badaccelerators 
 156      return getmarkedaccelerators 
157   
159      """returns a function that gets a list of variables marked using startmarker and endmarker""" 
160      def getmarkedvariables(str1): 
161          """returns all the variables in str1 marked with a given marker""" 
162          varlocs = findmarkedvariables(str1, startmarker, endmarker) 
163          variables = [variable for accelstart, variable in varlocs] 
164          return variables 
 165      return getmarkedvariables 
166   
168      """returns any numbers that are in the string""" 
169       
170      numbers = [] 
171      innumber = False 
172      try: 
173          wasstr = isinstance(str1, str) 
174          if wasstr: 
175              str1 = str1.decode('utf8') 
176          degreesign = u'\xb0' 
177      except: 
178          degreesign = None 
179      lastnumber = "" 
180      carryperiod = "" 
181      for chr1 in str1: 
182          if chr1.isdigit(): 
183              innumber = True 
184          elif innumber: 
185              if not (chr1 == '.' or chr1 == degreesign): 
186                  innumber = False 
187                  if lastnumber: 
188                      numbers.append(lastnumber) 
189                  lastnumber = "" 
190          if innumber: 
191              if chr1 == degreesign: 
192                  lastnumber += chr1 
193              elif chr1 == '.': 
194                  carryperiod += chr1 
195              else: 
196                  lastnumber += carryperiod + chr1 
197                  carryperiod = "" 
198          else: 
199              carryperiod = "" 
200      if innumber: 
201          if lastnumber: 
202              numbers.append(lastnumber) 
203      if wasstr and degreesign: 
204          numbers = [number.encode('utf8') for number in numbers] 
205      return numbers 
 206   
212   
214      """returns the email addresses that are in a string""" 
215      return re.findall('[\w\.\-]+@[\w\.\-]+', str1) 
 216   
218      """returns the URIs in a string""" 
219      URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\ 
220              'ftp:[\w/\.:;+\-~\%#?=&,]+' 
221      return re.findall(URLPAT, str1) 
 222   
224      """returns a function that counts the number of accelerators marked with the given marker""" 
225      def countmarkedaccelerators(str1): 
226          """returns all the variables in str1 marked with a given marker""" 
227          acclocs, badlocs = findaccelerators(str1, accelmarker, ignorelist) 
228          return len(acclocs), len(badlocs) 
 229      return countmarkedaccelerators 
230