-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.py
163 lines (132 loc) · 4.72 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import matplotlib.pyplot as plt
from matplotlib import colors
import matplotlib.dates as mdates
from matplotlib.colors import LogNorm, Normalize
import numpy as np
import pandas as pd
import seaborn as sn
from scipy.stats import entropy
###########################################################################
# Functions for plotting
def plot_sensor_data(name, df):
x = df[name]
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
# plt.xticks(np.arange(min(x), max(x)+1, 500.0))
plt.plot(x, label=name)
plt.gcf().autofmt_xdate()
plt.legend()
plt.title(f'Time series of sensor: {name}')
plt.show()
def plot_all_nine_sensors(dataframe, title="Timeseries of 9 sensors"):
plt.figure(figsize=(15, 9))
plt.suptitle(title)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
rows = 3
cols = 3
n = rows * cols
index = 1
for name, df in dataframe.items():
plt.subplot(rows, cols, index)
index += 1
plt.plot(df)
plt.gcf().autofmt_xdate()
plt.title(name)
plt.show()
def plot_kl(KL, title="KL divergence"):
# print(KL)
# im = plt.imshow(KL, norm=colors.LogNorm())
# plt.colorbar(im)
sn.heatmap(KL, annot=True)
plt.title(title)
plt.show()
###########################################################################
# Functions for reshaping
def reshape_df(df):
return df.resample('30S').mean().ffill() #Using ffill to replace Nan with their previous value. "30S" means resample to 1 sample per 30 seconds.
def reshape_dfs(dfs):
reshape_dfs = {}
for name, df in dfs.items():
r_df = reshape_df(df).fillna(method='ffill')
print(f'Original shape: {df.shape} Resampled shape: {r_df.shape}')
reshape_dfs[name] = r_df
print("Reshaping done")
return reshape_dfs
###########################################################################
#Functions for correlation
def correlate_dfs(standard_df, title = "Correlation between sensors"):
all_dfs = pd.DataFrame()
for name, df in standard_df.items():
all_dfs[name] = df.reset_index(drop=True)
corr = all_dfs.corr()
corr.style.background_gradient()
sn.heatmap(corr, annot=True)
plt.title(title)
plt.show()
###########################################################################
# Functions for standardization
def standardize_df(df):
avg = df.mean().iloc[0]
std = df.std().iloc[0]
tmp_df = (df-avg)/std
return tmp_df, avg, std
def standardize_dfs(dfs):
standard_df = {}
for name, df in dfs.items():
tmp_df,avg,std = standardize_df(df)
# print(f'{name}: Avg: {avg} Std: {std} Shape: {tmp_df.shape}')
standard_df[name] = tmp_df
return standard_df
###########################################################################
# Functiond for KL Divergence
def KL_with_params(m1, std1, m2, std2):
return np.log(std2/std1)+(std1**2 + (m1-m2)**2)/(2*std2**2)-1/2
def kl_dfs(dfs):
KL = []
for name, df in dfs.items():
avg1 = df.mean().iloc[0]
std1 = df.std().iloc[0]
row = []
for name, df2 in dfs.items():
avg2 = df2.mean().iloc[0]
std2 = df2.std().iloc[0]
kl_tmp = KL_with_params(avg1, std1, avg2, std2)
row.append(kl_tmp)
KL.append(row)
return KL
def mv_kl_dfs(dfs):
KL = []
for df in dfs.values():
df = df.dropna()
avg1 = np.mean(df, axis=0)
cov_mat1 = np.cov(df, rowvar=0)
row = []
for df2 in dfs.values():
df2 = df2.dropna()
avg2 = np.mean(df2, axis=0)
cov_mat2 = np.cov(df2, rowvar=0)
kl_tmp = kl_mvn(avg1, cov_mat1, avg2, cov_mat2)
row.append(kl_tmp)
KL.append(row)
return KL
def kl_mvn(m0, S0, m1, S1):
"""
Kullback-Liebler divergence from Gaussian pm,pv to Gaussian qm,qv.
Also computes KL divergence from a single Gaussian pm,pv to a set
of Gaussians qm,qv.
From wikipedia
KL( (m0, S0) || (m1, S1))
= .5 * ( tr(S1^{-1} S0) + log |S1|/|S0| +
(m1 - m0)^T S1^{-1} (m1 - m0) - N )
"""
# store inv diag covariance of S1 and diff between means
N = m0.shape[0]
iS1 = np.linalg.inv(S1)
diff = m1 - m0
# kl is made of three terms
tr_term = np.trace(iS1 @ S0)
det_term = np.log(np.linalg.det(S1)/np.linalg.det(S0)) #np.sum(np.log(S1)) - np.sum(np.log(S0))
quad_term = diff.T @ np.linalg.inv(S1) @ diff #np.sum( (diff*diff) * iS1, axis=1)
#print(tr_term,det_term,quad_term)
return .5 * (tr_term + det_term + quad_term - N)