-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_all_link_data.py
53 lines (39 loc) · 1.33 KB
/
parse_all_link_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Produces
# - all_link_data.csv
from pathlib import Path
from datetime import datetime
import pandas as pd
import helpers.helpers as helper
# Load meta parameters
plot_range_start = helper.plot_start
plot_range_end = helper.plot_end
debug = helper.debug
# Load metadata
data_path = Path('per-link-data')
meta_file = 'link_metadata.csv'
meta_data = pd.read_csv(meta_file)
# Concatenate all link_data
df_list = []
# Build up the figure
file_count = 0
total_files = len(meta_data)
for link_id in meta_data['link']:
# .. load the link data
link_data = pd.read_csv(str(data_path/link_id)+'.csv')
link_data['timestamp'] = pd.to_datetime(link_data['timestamp'],unit='s')
link_data.set_index('timestamp', inplace=True)
# .. filter for the date range of interest
link_data = link_data.loc[plot_range_start < link_data.index]
link_data = link_data.loc[link_data.index < plot_range_end]
df_list.append(link_data)
# log progress
file_count +=1
if file_count%100 == 0:
print('#file parsed: {} (out of {})'.format(file_count, total_files))
if debug & (file_count == 100):
break
# Save final data
file_id = 'all_link_data'
file_name = file_id + '_' + plot_range_start + '_' + plot_range_end +'.csv'
all_link_data = pd.concat(df_list)
all_link_data.to_csv(file_name, index=False, mode='w')