1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """functions to get decorative/informative text out of strings..."""
23
24 import re
25 import unicodedata
26
28 """returns all the whitespace from the start of the string"""
29 newstring = ""
30 for c in str1:
31 if not c.isspace(): return newstring
32 else: newstring += c
33 return newstring
34
36 """returns all the whitespace from the end of the string"""
37 newstring = ""
38 for n in range(len(str1)):
39 c = str1[-1-n]
40 if not c.isspace(): return newstring
41 else: newstring = c + newstring
42 return newstring
43
45 """returns all the punctuation from the start of the string"""
46 newstring = ""
47 for c in str1:
48 if c not in punctuation and not c.isspace(): return newstring
49 else: newstring += c
50 return newstring
51
53 """returns all the punctuation from the end of the string"""
54 newstring = ""
55 for n in range(len(str1)):
56 c = str1[-1-n]
57 if c not in punctuation and not c.isspace(): return newstring
58 else: newstring = c + newstring
59 return newstring
60
62 """checks whether the string is entirely punctuation"""
63 for c in str1:
64 if c.isalpha(): return 0
65 return len(str1)
66
68 """returns whether the given accelerator string is a valid one..."""
69 if len(accelerator) == 0 or accelerator in ignorelist:
70 return 0
71 accelerator = accelerator.replace("_","")
72 if not accelerator.isalnum():
73 return False
74
75
76
77 decomposition = unicodedata.decomposition(accelerator)
78
79 decomposition = re.sub("<[^>]+>", "", decomposition).strip()
80 return decomposition.count(" ") == 0
81
83 """returns all the accelerators and locations in str1 marked with a given marker"""
84 accelerators = []
85 badaccelerators = []
86 currentpos = 0
87 while currentpos >= 0:
88 currentpos = str1.find(accelmarker, currentpos)
89 if currentpos >= 0:
90 accelstart = currentpos
91 currentpos += len(accelmarker)
92
93 accelend = currentpos + 1
94 if accelend > len(str1): break
95 accelerator = str1[currentpos:accelend]
96 currentpos = accelend
97 if isvalidaccelerator(accelerator, ignorelist):
98 accelerators.append((accelstart, accelerator))
99 else:
100 badaccelerators.append((accelstart, accelerator))
101 return accelerators, badaccelerators
102
104 """returns all the variables and locations in str1 marked with a given marker"""
105 variables = []
106 currentpos = 0
107 while currentpos >= 0:
108 variable = None
109 currentpos = str1.find(startmarker, currentpos)
110 if currentpos >= 0:
111 startmatch = currentpos
112 currentpos += len(startmarker)
113 if endmarker is None:
114
115 endmatch = currentpos
116 for n in range(currentpos, len(str1)):
117 if not str1[n].isalnum():
118 endmatch = n
119 break
120 if currentpos == endmatch: endmatch = len(str1)
121 if currentpos < endmatch:
122 variable = str1[currentpos:endmatch]
123 currentpos = endmatch
124 elif type(endmarker) == int:
125
126 endmatch = currentpos + endmarker
127 if endmatch > len(str1): break
128 variable = str1[currentpos:endmatch]
129 currentpos = endmatch
130 else:
131 endmatch = str1.find(endmarker, currentpos)
132 if endmatch == -1: break
133
134 start2 = str1.rfind(startmarker, currentpos, endmatch)
135 if start2 != -1:
136 startmatch2 = start2
137 start2 += len(startmarker)
138 if start2 != currentpos:
139 currentpos = start2
140 startmatch = startmatch2
141 variable = str1[currentpos:endmatch]
142 currentpos = endmatch + len(endmarker)
143 if variable is not None and variable not in ignorelist:
144 if not variable or variable.replace("_","").replace(".","").isalnum():
145 variables.append((startmatch, variable))
146 return variables
147
149 """returns a function that gets a list of accelerators marked using accelmarker"""
150 def getmarkedaccelerators(str1):
151 """returns all the accelerators in str1 marked with a given marker"""
152 acclocs, badlocs = findaccelerators(str1, accelmarker, ignorelist)
153 accelerators = [accelerator for accelstart, accelerator in acclocs]
154 badaccelerators = [accelerator for accelstart, accelerator in badlocs]
155 return accelerators, badaccelerators
156 return getmarkedaccelerators
157
159 """returns a function that gets a list of variables marked using startmarker and endmarker"""
160 def getmarkedvariables(str1):
161 """returns all the variables in str1 marked with a given marker"""
162 varlocs = findmarkedvariables(str1, startmarker, endmarker)
163 variables = [variable for accelstart, variable in varlocs]
164 return variables
165 return getmarkedvariables
166
168 """returns any numbers that are in the string"""
169
170 numbers = []
171 innumber = False
172 try:
173 wasstr = isinstance(str1, str)
174 if wasstr:
175 str1 = str1.decode('utf8')
176 degreesign = u'\xb0'
177 except:
178 degreesign = None
179 lastnumber = ""
180 carryperiod = ""
181 for chr1 in str1:
182 if chr1.isdigit():
183 innumber = True
184 elif innumber:
185 if not (chr1 == '.' or chr1 == degreesign):
186 innumber = False
187 if lastnumber:
188 numbers.append(lastnumber)
189 lastnumber = ""
190 if innumber:
191 if chr1 == degreesign:
192 lastnumber += chr1
193 elif chr1 == '.':
194 carryperiod += chr1
195 else:
196 lastnumber += carryperiod + chr1
197 carryperiod = ""
198 else:
199 carryperiod = ""
200 if innumber:
201 if lastnumber:
202 numbers.append(lastnumber)
203 if wasstr and degreesign:
204 numbers = [number.encode('utf8') for number in numbers]
205 return numbers
206
212
214 """returns the email addresses that are in a string"""
215 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
216
218 """returns the URIs in a string"""
219 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\
220 'ftp:[\w/\.:;+\-~\%#?=&,]+'
221 return re.findall(URLPAT, str1)
222
224 """returns a function that counts the number of accelerators marked with the given marker"""
225 def countmarkedaccelerators(str1):
226 """returns all the variables in str1 marked with a given marker"""
227 acclocs, badlocs = findaccelerators(str1, accelmarker, ignorelist)
228 return len(acclocs), len(badlocs)
229 return countmarkedaccelerators
230