-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_words.py
48 lines (38 loc) · 1.36 KB
/
add_words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import psycopg2
import psycopg2.extras
import functools
import re
connection = psycopg2.connect(user="postgres",
password="root",
host="127.0.0.1",
port="5432",
database="github")
cur = connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
def get_word_vector(word):
cur.execute(f'SELECT * from words where word=\'{word.lower()}\'') # check with hyphen
wordVec = cur.fetchall()
return wordVec
cur.execute(
f'SELECT * from issues where tokens is not null and question is false ORDER BY random()')
rows = cur.fetchall()
file = open("./dataset/all/nq_vectors.txt", "a")
missingsFile = open("./dataset/all/missing_words.txt", "a")
for row in rows:
try:
print(row['id'])
tokens = row['tokens'].split(',')
vectors = []
for token in tokens:
wordVec = get_word_vector(token)
if(len(wordVec) > 0):
# print(wordVec[0]['word'])
# print(wordVec[0]['vector'])
vectors.append(wordVec[0]['vector'])
else:
missingsFile.write(f'{row["id"]},{token}\n')
file.write(f'{row["id"]},{row["sentence_count"]},{row["tokens_count"]},{",".join(vectors)},{1 if row["question"] else 0}\n')
except Exception as e:
print(f'error for id: {row["id"]}')
print(e)
file.close()
connection.close()