-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgoogle_translate_test.py
28 lines (24 loc) · 1.02 KB
/
google_translate_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from lib.google_trans_new.google_trans_new import google_translator
translator = google_translator()
# words = re.findall(r"[\w']+|[.,!?;]", p.lower().replace(" '", "'").replace("&", "&"))
# for i, wrd in enumerate(words):
# words[i] = wrd.replace("'", " '").replace("&", "&")
#NOTE: Google will time out if you do too many requests, 100k is not achievable
file = open("corpuses/fr-en/test/test.fr", "r", encoding="utf-8") #welsh-english
lines = file.readlines()
file.close()
out = []
count = 0
for l in lines[:1000]: # first 5k in test set
count +=1
try: # Google has a cap for requests ~1200, we'll take what we can
t = translator.translate(l.replace("'", "'").replace("&", "&"), lang_src='fr')
except:
print("COULD NOT FETCH, SAVING.")
break
out.append(t)
print("{}) {}".format(count, t))
file = open("output/google_translate/output.2.fr-en.txt", "w", encoding="utf-8")
for l in out:
file.write(l.replace("&", "'") + "\n")
file.close()