-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathconvert_realdata_syntheform.py
54 lines (48 loc) · 1.89 KB
/
convert_realdata_syntheform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pickle
import numpy as np
def convert_task_sub(task, sub):
timestamps_list = []
types_list = []
lengths_list = []
timeintervals_list = []
file_path = '../../data/' + task + '/' + sub +'.pkl'
with open(file_path, 'rb') as f:
file = pickle.load(f, encoding='latin1')
dim_process = file['dim_process']
print('dim_process: {} for task: {}'.format(dim_process,task))
seqs = file[sub]
one_seq_num = 0
for seq in seqs:
timestamps = []
types = []
timeintervals = []
for event in seq:
event_type = event['type_event']
event_timestamp = event['time_since_start']
event_timeinterval = event['time_since_last_event']
timestamps.append(event_timestamp)
types.append(event_type)
timeintervals.append(event_timeinterval)
lengths = len(seq)
if lengths == 1:
one_seq_num += 1
continue
timestamps_list.append(np.asarray(timestamps))
types_list.append(np.asarray(types))
lengths_list.append(np.asarray(lengths))
timeintervals_list.append(np.asarray(timeintervals))
print('one_seq_num: {}'.format(one_seq_num))
save_path = '../../data/' + task + '/' + sub +'_manifold_format.pkl'
with open(save_path, "wb") as f:
save_data_ = {'timestamps': np.asarray(timestamps_list),
'types': np.asarray(types_list),
'lengths': np.asarray(lengths_list),
'timeintervals': np.asarray(timeintervals_list)
}
pickle.dump(save_data_,f)
if __name__ == '__main__':
task_list = ['bookorder']
sub_dataset = ['train', 'dev', 'test']
for task in task_list:
for sub in sub_dataset:
convert_task_sub(task,sub)