-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtopic_utils.py
49 lines (27 loc) · 881 Bytes
/
topic_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from __future__ import division, print_function
import csv
from sklearn.utils import check_random_state
import numpy as np
import match_utils as mu
def read_jslda_doctopic_file(doctopic_file):
para_to_topics = {} # a paragraph is a (speechname, paranum) tuple
with open(doctopic_file, 'rb') as f:
reader = csv.reader(f)
for row in reader:
name = row[0].split('_')
speechname = '_'.join(name[:-1])
paranum = int(name[-1])
if row[1] == 'NaN':
topic_weights = None
else:
topic_weights = np.array([float(x) for x in row[1:]])
para_to_topics[(speechname, paranum)] = topic_weights
return para_to_topics
def read_jslda_topic_file(topicsummary_file):
summaries = []
with open(topicsummary_file, 'rb') as f:
reader = csv.reader(f)
headings = reader.next()
for row in reader:
summaries.append(row[-1])
return summaries