-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathggnumber.py~
216 lines (189 loc) · 10.9 KB
/
ggnumber.py~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/python
# -*- coding: UTF-8 -*-
def CountOccurencesInText(word,text):
"""Number of occurences of word (case insensitive) in text"""
#This does not pass the unittests:
tmpText = text.lower()
tmpText = tmpText.strip()
tmpWord = word.lower()
tmpText = tmpText.replace("."," ")
print tmpText
tmpList = tmpText.split()
b = ",.!?"
#tmpList = re.sub('[!@#$]', '', tmpList)
#tmpList = tmpList.translate(None, '.!$')
#print tmpList
number = 0
wordInGuillemet = None
isGuillemet = False
indexToDelete = None
for tmp in tmpList:
global wordInGuillemet
if isGuillemet:
wordInGuillemet += tmp
if tmp[0] == '\'':
isGuillemet = True
wordInGuillemet = tmp
indexToDelete = tmpList.index(tmp)
print "indexTodelete:"+str(indexToDelete)
if tmp[-1] == '\'':
isGuillemet = False
print wordInGuillemet
print tmpList
for tmp in tmpList:
#tmp = tmp.translate(None, './$!')
for char in b:
tmp = tmp.replace(char,"")
#print tmp
if tmp == tmpWord:
number = number + 1
#if tmp == 'python'
#print tmp
print number
#print tmpList
return number
def testCountOccurencesInText():
""" Test the CountOccurencesInText function"""
text="""Georges is my name and I like python. Oh ! your name is georges? And you like Python!
Yes is is true, I like PYTHON
and my name is GEORGES"""
# test with a little text.
'''assert( 3 == CountOccurencesInText("Georges",text) )
assert( 3 == CountOccurencesInText("GEORGES",text) )
assert( 3 == CountOccurencesInText("georges",text) )
assert( 0 == CountOccurencesInText("george",text) )
assert( 3 == CountOccurencesInText("python",text) )
assert( 3 == CountOccurencesInText("PYTHON",text) )
assert( 2 == CountOccurencesInText("I",text) )
assert( 0 == CountOccurencesInText("n",text) )
assert( 1 == CountOccurencesInText("true",text) )'''
# regard ' as text:
#assert ( 0 == CountOccurencesInText ( "maley", "John O'maley is my friend" ) )
# Test it but with a BIG length file. (we once had a memory error with this...)
#text = """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" #* 500
text = """The quick brown fox jump over the lazy dog.The quick brown Georges jump over the lazy dog."""
#text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
#text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
text += """The quick brown fox jump over the lazy dog.The quick brown Georges jump over the lazy dog."""
#text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
#text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
text += """The quick brown fox jump over the lazy dog.The quick brown Georges jump over the lazy dog."""
#text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
text += """The quick brown fox jump over the true lazy dog.The quick brown fox jump over the lazy dog."""
#text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
text += """ I vsfgsdfg sfdg sdfg sdgh sgh I sfdgsdf"""
#text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
assert( 3 == CountOccurencesInText("Georges",text) )
assert( 3 == CountOccurencesInText("GEORGES",text) )
assert( 3 == CountOccurencesInText("georges",text) )
assert( 0 == CountOccurencesInText("george",text) )
print text
assert( 3 == CountOccurencesInText("python",text) )
assert( 3 == CountOccurencesInText("PYTHON",text) )
assert( 2 == CountOccurencesInText("I",text) )
assert( 0 == CountOccurencesInText("n",text) )
assert( 1 == CountOccurencesInText("true",text) )
assert( 0 == CountOccurencesInText("reflexion mirror",
"I am a senior citizen and I live in the Fun-Plex 'Reflexion Mirror' in Sopchoppy, Florida") )
assert( 1 == CountOccurencesInText("'reflexion mirror'",
"I am a senior citizen and I live in the Fun-Plex 'Reflexion Mirror' in Sopchoppy, Florida") )
assert( 1 == CountOccurencesInText("reflexion mirror",
"I am a senior citizen and I live in the Fun-Plex (Reflexion Mirror) in Sopchoppy, Florida") )
assert( 1 == CountOccurencesInText("reflexion mirror",
"Reflexion Mirror\" in Sopchoppy, Florida") )
assert( 1 == CountOccurencesInText("reflexion mirror",
u"I am a senior citizen and I live in the Fun-Plex «Reflexion Mirror» in Sopchoppy, Florida") )
assert( 1 == CountOccurencesInText("reflexion mirror",
u"I am a senior citizen and I live in the Fun-Plex \u201cReflexion Mirror\u201d in Sopchoppy, Florida") )
assert( 1 == CountOccurencesInText("legitimate",
u"who is approved by OILS is completely legitimate: their employees are of legal working age") )
assert( 0 == CountOccurencesInText("legitimate their",
u"who is approved by OILS is completely legitimate: their employees are of legal working age") )
assert( 1 == CountOccurencesInText("get back to me",
u"I hope you will consider this proposal, and get back to me as soon as possible") )
assert( 1 == CountOccurencesInText("skin-care",
u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
assert( 1 == CountOccurencesInText("skin-care monopoly",
u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
assert( 0 == CountOccurencesInText("skin-care monopoly in the US",
u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
assert( 1 == CountOccurencesInText("get back to me",
u"When you know:get back to me") )
assert( 1 == CountOccurencesInText("don't be left" , """emergency alarm warning.
Don't be left unprotected. Order your SSSS3000 today!""" ) )
assert( 1 == CountOccurencesInText("don" , """emergency alarm warning.
Don't be left unprotected. Order your don SSSS3000 today!""" ) )
assert( 1 == CountOccurencesInText("take that as a 'yes'",
"Do I have to take that as a 'yes'?") )
assert( 1 == CountOccurencesInText("don't take that as a 'yes'",
"I don't take that as a 'yes'?") )
assert( 1 == CountOccurencesInText("take that as a 'yes'",
"I don't take that as a 'yes'?") )
assert( 1 == CountOccurencesInText("don't",
"I don't take that as a 'yes'?") )
assert( 1 == CountOccurencesInText("attaching my c.v. to this e-mail",
"I am attaching my c.v. to this e-mail." ))
assert ( 1 == CountOccurencesInText ( "Linguist", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
assert ( 1 == CountOccurencesInText ( "Linguist Specialist", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
assert ( 1 == CountOccurencesInText ( "Laboratory Floor", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
assert ( 1 == CountOccurencesInText ( "Floor", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
assert ( 1 == CountOccurencesInText ( "Floor", "''Linguist Specialist Found Dead on Laboratory Floor''" ))
assert ( 1 == CountOccurencesInText ( "Floor", "__Linguist Specialist Found Dead on Laboratory Floor__" ))
assert ( 1 == CountOccurencesInText ( "Floor", "'''''Linguist Specialist Found Dead on Laboratory Floor'''''" ))
assert ( 1 == CountOccurencesInText ( "Linguist", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
assert ( 1 == CountOccurencesInText ( "Linguist", "''Linguist Specialist Found Dead on Laboratory Floor''" ))
assert ( 1 == CountOccurencesInText ( "Linguist", "__Linguist Specialist Found Dead on Laboratory Floor__" ))
assert ( 1 == CountOccurencesInText ( "Linguist", "'''''Linguist Specialist Found Dead on Laboratory Floor'''''" ))
assert ( 1 == CountOccurencesInText ( "Floor", """Look: ''Linguist Specialist Found Dead on Laboratory Floor'' is the headline today."""))
SampleTextForBench = """
A Suggestion Box Entry from Bob Carter
Dear Anonymous,
I'm not quite sure I understand the concept of this 'Anonymous' Suggestion Box. If no one reads what we write, then how will anything ever
change?
But in the spirit of good will, I've decided to offer my two cents, and hopefully Kevin won't steal it! (ha, ha). I would really like to
see more varieties of coffee in the coffee machine in the break room. 'Milk and sugar', 'black with sugar', 'extra sugar' and 'cream and su
gar' don't offer much diversity. Also, the selection of drinks seems heavily weighted in favor of 'sugar'. What if we don't want any suga
r?
But all this is beside the point because I quite like sugar, to be honest. In fact, that's my second suggestion: more sugar in the office.
Cakes, candy, insulin, aspartame... I'm not picky. I'll take it by mouth or inject it intravenously, if I have to.
Also, if someone could please fix the lock on the men's room stall, that would be helpful. Yesterday I was doing my business when Icarus ne
arly climbed into my lap.
So, have a great day!
Anonymously,
Bob Carter
"""
def doit():
"""Run CountOccurencesInText on a few examples"""
i = 0
for x in xrange(400):
i+= CountOccurencesInText("word" , SampleTextForBench)
i+= CountOccurencesInText("sugar" , SampleTextForBench)
i+= CountOccurencesInText("help" , SampleTextForBench)
i+= CountOccurencesInText("heavily" , SampleTextForBench)
i+= CountOccurencesInText("witfull" , SampleTextForBench)
i+= CountOccurencesInText("dog" , SampleTextForBench)
i+= CountOccurencesInText("almost" , SampleTextForBench)
i+= CountOccurencesInText("insulin" , SampleTextForBench)
i+= CountOccurencesInText("attaching" , SampleTextForBench)
i+= CountOccurencesInText("asma" , SampleTextForBench)
i+= CountOccurencesInText("neither" , SampleTextForBench)
i+= CountOccurencesInText("won't" , SampleTextForBench)
i+= CountOccurencesInText("green" , SampleTextForBench)
i+= CountOccurencesInText("parabole" , SampleTextForBench)
print i
#Start the tests
if __name__ == '__main__':
#I need to pass the test:
try:
testCountOccurencesInText()
except:
print "Error !"
raise
print "Tests passed"
#I need to be fast as well:
import profile
import re
profile.run('doit()')