-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbocpd.py
317 lines (258 loc) · 10.5 KB
/
bocpd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# Acknowledgement: The code is copied and developed from
# https://github.com/hildensia/bayesian_changepoint_detection/tree/master/bayesian_changepoint_detection
from abc import ABC, abstractmethod
import numpy as np
import scipy.stats as ss
from itertools import islice
from numpy.linalg import inv
from functools import partial
class BaseLikelihood(ABC):
"""
This is an abstract class to serve as a template for future users to mimick
if they want to add new models for online bayesian changepoint detection.
Make sure to override the abstract methods to do which is desired.
Otherwise you will get an error.
Update theta has **kwargs to pass in the timestep iteration (t) if desired.
To use the time step add this into your update theta function:
timestep = kwargs['t']
"""
@abstractmethod
def pdf(self, data: np.array):
raise NotImplementedError(
"PDF is not defined. Please define in separate class to override this function."
)
@abstractmethod
def update_theta(self, data: np.array, **kwargs):
raise NotImplementedError(
"Update theta is not defined. Please define in separate class to override this function."
)
class MultivariateT(BaseLikelihood):
def __init__(
self,
dims: int = 1,
dof: int = 0,
kappa: int = 1,
mu: float = -1,
scale: float = -1,
):
"""
Create a new predictor using the multivariate student T distribution as the posterior predictive.
This implies a multivariate Gaussian distribution on the data, a Wishart prior on the precision,
and a Gaussian prior on the mean.
Implementation based on Haines, T.S., Gaussian Conjugate Prior Cheat Sheet.
:param dof: The degrees of freedom on the prior distribution of the precision (inverse covariance)
:param kappa: The number of observations we've already seen
:param mu: The mean of the prior distribution on the mean
:param scale: The mean of the prior distribution on the precision
:param dims: The number of variables
"""
# We default to the minimum possible degrees of freedom, which is 1 greater than the dimensionality
if dof == 0:
dof = dims + 1
# The default mean is all 0s
if mu == -1:
mu = [0] * dims
else:
mu = [mu] * dims
# The default covariance is the identity matrix. The scale is the inverse of that, which is also the identity
if scale == -1:
scale = np.identity(dims)
else:
scale = np.identity(scale)
# Track time
self.t = 0
# The dimensionality of the dataset (number of variables)
self.dims = dims
# Each parameter is a vector of size 1 x t, where t is time. Therefore each vector grows with each update.
self.dof = np.array([dof])
self.kappa = np.array([kappa])
self.mu = np.array([mu])
self.scale = np.array([scale])
def pdf(self, data: np.array):
"""
Returns the probability of the observed data under the current and historical parameters
Parmeters:
data - the datapoints to be evaualted (shape: 1 x D vector)
"""
self.t += 1
t_dof = self.dof - self.dims + 1
expanded = np.expand_dims((self.kappa * t_dof) / (self.kappa + 1), (1, 2))
ret = np.empty(self.t)
try:
# This can't be vectorised due to https://github.com/scipy/scipy/issues/13450
for i, (df, loc, shape) in islice(
enumerate(zip(t_dof, self.mu, inv(expanded * self.scale))), self.t
):
ret[i] = ss.multivariate_t.pdf(x=data, df=df, loc=loc, shape=shape)
except AttributeError:
raise Exception(
"You need scipy 1.6.0 or greater to use the multivariate t distribution"
)
return ret
def update_theta(self, data: np.array, **kwargs):
"""
Performs a bayesian update on the prior parameters, given data
Parmeters:
data - the datapoints to be evaluated (shape: 1 x D vector)
"""
centered = data - self.mu
# We simultaneously update each parameter in the vector, because following figure 1c of the BOCD paper, each
# parameter for a given t, r is derived from the same parameter for t-1, r-1
# Then, we add the prior back in as the first element
self.scale = np.concatenate(
[
self.scale[:1],
inv(
inv(self.scale)
+ np.expand_dims(self.kappa / (self.kappa + 1), (1, 2))
* (np.expand_dims(centered, 2) @ np.expand_dims(centered, 1))
),
]
)
self.mu = np.concatenate(
[
self.mu[:1],
(np.expand_dims(self.kappa, 1) * self.mu + data)
/ np.expand_dims(self.kappa + 1, 1),
]
)
self.dof = np.concatenate([self.dof[:1], self.dof + 1])
self.kappa = np.concatenate([self.kappa[:1], self.kappa + 1])
class StudentT(BaseLikelihood):
def __init__(
self, alpha: float = 0.1, beta: float = 0.1, kappa: float = 1, mu: float = 0
):
"""
StudentT distribution except normal distribution is replaced with the student T distribution
https://en.wikipedia.org/wiki/Normal-gamma_distribution
Parameters:
alpha - alpha in gamma distribution prior
beta - beta inn gamma distribution prior
mu - mean from normal distribution
kappa - variance from normal distribution
"""
self.alpha0 = self.alpha = np.array([alpha])
self.beta0 = self.beta = np.array([beta])
self.kappa0 = self.kappa = np.array([kappa])
self.mu0 = self.mu = np.array([mu])
def pdf(self, data: np.array):
"""
Return the pdf function of the t distribution
Parmeters:
data - the datapoints to be evaluated (shape: 1 x D vector)
"""
return ss.t.pdf(
x=data,
df=2 * self.alpha,
loc=self.mu,
scale=np.sqrt(self.beta * (self.kappa + 1) / (self.alpha * self.kappa)),
)
def update_theta(self, data: np.array, **kwargs):
"""
Performs a bayesian update on the prior parameters, given data
Parmeters:
data - the datapoints to be evaluated (shape: 1 x D vector)
"""
muT0 = np.concatenate(
(self.mu0, (self.kappa * self.mu + data) / (self.kappa + 1))
)
kappaT0 = np.concatenate((self.kappa0, self.kappa + 1.0))
alphaT0 = np.concatenate((self.alpha0, self.alpha + 0.5))
betaT0 = np.concatenate(
(
self.beta0,
self.beta
+ (self.kappa * (data - self.mu) ** 2) / (2.0 * (self.kappa + 1.0)),
)
)
self.mu = muT0
self.kappa = kappaT0
self.alpha = alphaT0
self.beta = betaT0
def const_prior(t, p: float = 0.25):
"""
Constant prior for every datapoint
Arguments:
p - probability of event
"""
return np.log(p)
def geom_prior(t, p: float = 0.25):
"""
geometric prior for every datapoint
Refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.geom.html for more information on the geometric prior
Everything reported is in log form.
Arguments:
t - number of trials
p - probability of success
"""
return np.log(ss.geom.pmf(t, p=p))
def negative_binomial_prior(t, k: int = 1, p: float = 0.25):
"""
negative binomial prior for the datapoints
Refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.nbinom.html for more information on the geometric prior
Everything reported is in log form.
Parameters:
k - the number of trails until success
p - the prob of success
"""
return ss.nbinom.pmf(self.k, t, self.p)
def constant_hazard(lam, r):
"""
Hazard function for bayesian online learning
Arguments:
lam - inital prob
r - R matrix
"""
return 1 / lam * np.ones(r.shape)
hazard_function = partial(constant_hazard, 250)
def online_changepoint_detection(data, hazard_function, log_likelihood_class):
"""
Parameters:
data -- the time series data
Outputs:
R -- is the probability at time step t that the last sequence is already s time steps long
maxes -- the argmax on column axis of matrix R (growth probability value) for each time step
"""
maxes = np.zeros(len(data) + 1)
R = np.zeros((len(data) + 1, len(data) + 1))
R[0, 0] = 1
for t, x in enumerate(data):
# Evaluate the predictive distribution for the new datum under each of
# the parameters. This is the standard thing from Bayesian inference.
predprobs = log_likelihood_class.pdf(x)
# Evaluate the hazard function for this interval
H = hazard_function(np.array(range(t + 1)))
# Evaluate the growth probabilities - shift the probabilities down and to
# the right, scaled by the hazard function and the predictive
# probabilities.
R[1 : t + 2, t + 1] = R[0 : t + 1, t] * predprobs * (1 - H)
# Evaluate the probability that there *was* a changepoint and we're
# accumulating the mass back down at r = 0.
R[0, t + 1] = np.sum(R[0 : t + 1, t] * predprobs * H)
# Renormalize the run length probabilities for improved numerical
# stability.
R[:, t + 1] = R[:, t + 1] / np.sum(R[:, t + 1])
# Update the parameter sets for each possible run length.
log_likelihood_class.update_theta(x, t=t)
maxes[t] = R[:, t].argmax()
return R, maxes
# Acknowledgement: The code is copied and developed from
# https://github.com/phamquiluan/baro
def multivariate_bocpd(data):
return bocpd(data, likelihood=MultivariateT(dims=data.shape[1]))
def find_cps(maxes):
cps = []
for i in range(1, len(maxes)):
if abs(maxes[i] - maxes[i-1]) > 1:
cps.append((i, abs(maxes[i] - maxes[i-1])))
return cps
def bocpd(data, likelihood, lam=50):
data = data.to_numpy()
R, maxes = online_changepoint_detection(
data,
partial(constant_hazard, lam),
likelihood
)
cps = find_cps(maxes)
cps = [p[0] for p in cps]
return cps