forked from memegen/meme-ocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordpair.py
82 lines (69 loc) · 2.48 KB
/
wordpair.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
wl = open("dict/linuxwords.txt","r").read().upper().split("\n")
# pair letters in words with similar spelling
def wordpair(w1, w2):
result = []
l = min(len(w1),len(w2))
while len(w1) > 0 and len(w2) > 0:
#print result
if w1[0] == w2[0]:
result.append((w1[0],w2[0]))
w1 = w1[1:]
w2 = w2[1:]
else:
ws = [w1,w2]
xs = [len(w1),len(w2)]
for n in range(1,5):
for i1,i2 in [(0,1),(1,0)]:
found = False
for j in range(len(ws[i1])):
for k in range(j,min(j+n,len(ws[i2]))):
if ws[i1][j] == ws[i2][k]:
if abs(j-k)+(j+k) < abs(xs[i1]-xs[i2])+(xs[i1]+xs[i2]):
xs[i1] = j
xs[i2] = k
found = True
break
if found == True:
break
x1, x2 = xs
result.append((w1[:x1],w2[:x2]))
w1 = w1[x1:]
w2 = w2[x2:]
if len(w1) > 0 or len(w2) > 0:
result.append((w1[:],w2[:]))
return result
# pretty print result of wordpair
def printwordpair(wp):
l1 = ""
l2 = ""
for w in wp:
l1 += w[0]+"\t"
l2 += w[1]+"\t"
return l1+"\n"+l2+"\n"
# test wordpair with function f
def testwordpair(f):
print f(wordpair("president","precedent"))
print f(wordpair("affection","affectation"))
print f(wordpair("eminent","immanent"))
print f(wordpair("principal","principle"))
print f(wordpair("desert","dessert"))
print f(wordpair("deed","indeed"))
print f(wordpair("immense","intense"))
print f(wordpair("drastic","dramatic"))
print f(wordpair("emulsion","emotion"))
print f(wordpair("wikipedia","vvil<ieolix"))
# similarity of a word pair
def wordsim(wp):
return sum([w[0] == w[1] for w in wp])*1.0/sum([(len(w[0])+len(w[1]))/2.0 for w in wp])
def suggest(w0):
results = []
for w in wl:
if abs(len(w)-len(w0)) <= 4:
s = wordsim(wordpair(w0.upper(),w))
if s > 0.5:
results.append((w,int(s*1000)/1000.0))
return sorted(list(set(results)), key= lambda x: x[1],reverse=True)[:10]
if __name__=="__main__":
#testwordpair(printwordpair)
#testwordpair(wordsim)
print suggest("emul")