-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlcbin.py
90 lines (75 loc) · 3.64 KB
/
lcbin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
def robust_mean(y, cut):
"""Computes a robust mean estimate in the presence of outliers.
Args:
y: 1D numpy array. Assumed to be normally distributed with outliers.
cut: Points more than this number of standard deviations from the median are
ignored.
Returns:
mean: A robust estimate of the mean of y.
mean_stddev: The standard deviation of the mean.
mask: Boolean array with the same length as y. Values corresponding to
outliers in y are False. All other values are True.
"""
# First, make a robust estimate of the standard deviation of y, assuming y is
# normally distributed. The conversion factor of 1.4826 takes the median
# absolute deviation to the standard deviation of a normal distribution.
# See, e.g. https://www.mathworks.com/help/stats/mad.html.
absdev = np.abs(y - np.median(y))
sigma = 1.4826 * np.median(absdev)
# If the previous estimate of the standard deviation using the median absolute
# deviation is zero, fall back to a robust estimate using the mean absolute
# deviation. This estimator has a different conversion factor of 1.253.
# See, e.g. https://www.mathworks.com/help/stats/mad.html.
if sigma < 1.0e-24:
sigma = 1.253 * np.mean(absdev)
# Identify outliers using our estimate of the standard deviation of y.
mask = absdev <= cut * sigma
# Now, recompute the standard deviation, using the sample standard deviation
# of non-outlier points.
sigma = np.std(y[mask])
# Compensate the estimate of sigma due to trimming away outliers. The
# following formula is an approximation, see
# http://w.astro.berkeley.edu/~johnjohn/idlprocs/robust_mean.pro.
sc = np.max([cut, 1.0])
if sc <= 4.5:
sigma /= (-0.15405 + 0.90723 * sc - 0.23584 * sc**2 + 0.020142 * sc**3)
# Identify outliers using our second estimate of the standard deviation of y.
mask = absdev <= cut * sigma
# Now, recompute the standard deviation, using the sample standard deviation
# with non-outlier points.
sigma = np.std(y[mask])
# Compensate the estimate of sigma due to trimming away outliers.
sc = np.max([cut, 1.0])
if sc <= 4.5:
sigma /= (-0.15405 + 0.90723 * sc - 0.23584 * sc**2 + 0.020142 * sc**3)
# Final estimate is the sample mean with outliers removed.
mean = np.mean(y[mask])
mean_stddev = sigma / np.sqrt(len(y) - 1.0)
return mean, mean_stddev, mask
def lcbin(time2, lc, nbins, method = 'robustmean', sigmacutin = 3, errorin = None):
time = time2 - np.min(time2)
binsize = float(np.max(time) - np.min(time))/float(nbins)
fbin = np.zeros(nbins) + np.nan #replicate(!Values.F_NAN, nbins)
ebin = np.copy(fbin) #replicate(!Values.F_NAN, nbins)
tbin = (np.arange(nbins) + 0.5 ) * binsize
tbin = tbin + np.min(time2)
goodindmask = np.zeros(len(time2)) + np.nan
for i in range(nbins):
w = np.argwhere(np.logical_and(time >= i*binsize, time < (i+1)*binsize))
if len(w) > 0:
if method == 'robustmean':
thisfbin,thissigmafbin,thismask = robust_mean(lc[w],sigmacutin)
thismask = np.ravel(thismask)
fbin[i] = thisfbin
goodindmask[np.ravel(w)] = thismask
goodind = np.argwhere(thismask)
if len(w) > 1:
if np.sum(thismask) > 1: # gt 1 then begin
ebin[i] = np.std(lc[w[goodind]], ddof=1) / np.sqrt(np.sum(thismask))
if errorin is not None:
fbin[i] = np.sum(lc[w[goodind]] / errorin[w[goodind]]**2) / np.sum(1.0 / errorin[w[goodind]]**2)
ebin[i] = np.sqrt(1.0 / np.sum(1.0 / errorin[w[goodind]]**2))
if np.sum(thismask) == 1: #(goodind) eq 1 then begin
ebin[i] = np.inf
return tbin,fbin, ebin, goodindmask