forked from memegen/meme-ocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocrcomp.py
64 lines (52 loc) · 1.58 KB
/
ocrcomp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import json
wl = open("dict/linuxwords.txt","r").read().upper().split("\n")
# mock ocr functions:
# diy raw ocr from data without auto-correct
def ocr0(path):
import parse
parse.path = path
fi = open("data/"+path.split("/")[-1].split(".")[0]+".json","r")
js = json.loads(fi.read())
parse.bds = js[0]
parse.ccr = js[1]
return parse.guesscaption(simple=True)
# diy ocr from scratch
def ocr(path):
import memeocr as mo
import parse
parse.bds, parse.ccr = mo.rawocr(path)
return parse.guesscaption()
# diy ocr from data
def ocr1(path):
import parse
parse.path = path
fi = open("data/"+path.split("/")[-1].split(".")[0]+".json","r")
js = json.loads(fi.read())
parse.bds = js[0]
parse.ccr = js[1]
return parse.guesscaption()
# mock tesserect result
def ocr2(path):
return "YO BAWC I HEAR YOU CAN'T USE WIKIDEMIA AS A SOURCE SC | SOURCE THE SOURCES INSIDE OF WIRETAPPER'S SOURCES"
# evaluate the quality of an ocr result
def evalresult(t):
puncs = ".,!?"
t = t.replace("\n"," ")
for p in puncs:
t = t.replace(p," "+p)
t = t.split(" ")
score = 0.0
for i in range(0,len(t)):
if t[i] in wl:
score += 1.0
print score/len(t)
return score/len(t)
# sort a list of ocr functions by their quality
def ocrcomp(path,*args):
results = []
for f in args:
results.append((f,f(path)))
return sorted(results, key = lambda x: evalresult(x[1]), reverse = True)
if __name__ == "__main__":
#print ocr1("images/img11.jpg")
print ocrcomp("images/img6.jpg",ocr0,ocr2)