-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathAccelerometerFeatureExtractionScript.py
309 lines (234 loc) · 11.4 KB
/
AccelerometerFeatureExtractionScript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
import numpy as np
import pandas as pd
import scipy.signal as scisig
import os
import matplotlib.pyplot as plt
from load_files import getInputLoadFile, getOutputPath, get_user_input
DEBUG = True
SAMPLING_RATE = 8
ONE_MINUTE_S = 60
THIRTY_MIN_S = ONE_MINUTE_S*30
SECONDS_IN_DAY = 24*60*60
STILLNESS_MOTION_THRESHOLD = .1
PERCENT_STILLNESS_THRESHOLD = .95
STEP_DIFFERENCE_THRESHOLD = 0.3
def computeAllAccelerometerFeatures(data, time_frames):
if DEBUG: print("\t\tcomputing motion...")
motion = computeMotion(data['AccelX'], data['AccelY'], data['AccelZ'])
if DEBUG: print("\t\tcomputing steps...")
steps = computeSteps(motion)
if DEBUG: print("\t\tcomputing stillness...")
stillness = computeStillness(motion)
features = []
for time_frame in time_frames:
start = time_frame[0]
end = time_frame[1]
start1Hz = int(start / SAMPLING_RATE)
end1Hz = end if end == -1 else int(end / SAMPLING_RATE)
if DEBUG: print("\t\tcomputing features for time frame. Start index: "+ str(start)+ " end index: "+ str(end))
time_frame_feats = computeAccelerometerFeaturesOverOneTimeFrame(motion[start:end],
steps[start:end],
stillness[start1Hz:end1Hz])
features.append(time_frame_feats)
return features, steps, motion
def computeMotion(acc1, acc2, acc3):
'''Aggregates 3-axis accelerometer signal into a single motion signal'''
return np.sqrt(np.array(acc1)**2 + np.array(acc2)**2 + np.array(acc3)**2)
def computeSteps(motion):
'''Determines the location of steps from the aggregated accelerometer signal.
Signal is low-pass filtered, then minimums are located in the signal. For each
min, if the max absolute derivative (first difference) immediately surrounding
it is greater than a threshold, it is counted as a step.
Args:
motion: root mean squared 3 axis acceleration
Returns:
steps: binary array at 8Hz which is 1 everywhere there is a step'''
filtered_signal = filterSignalFIR(motion, 2, 256)
diff = filtered_signal[1:]-filtered_signal[:-1]
mins = scisig.argrelextrema(filtered_signal, np.less)[0]
steps = [0] * len(filtered_signal)
for m in mins:
if m <= 4 or m >= len(diff) - 4:
continue
if max(abs(diff[m-4:m+4])) > STEP_DIFFERENCE_THRESHOLD:
steps[m] = 1.0
return steps
def filterSignalFIR(eda, cutoff=0.4, numtaps=64):
f = cutoff/(SAMPLING_RATE/2.0)
FIR_coeff = scisig.firwin(numtaps,f)
return scisig.lfilter(FIR_coeff,1,eda)
def computeStillness(motion):
'''Locates periods in which the person is still or motionless.
Total acceleration must be less than a threshold for 95 percent of one
minute in order for that minute to count as still
Args:
motion: an array containing the root mean squared acceleration
Returns:
A 1Hz array that is 1 for each second belonging to a still period, 0 otherwise
'''
diff = motion[1:]-motion[:-1]
momentary_stillness = diff < STILLNESS_MOTION_THRESHOLD
np.append(momentary_stillness,0) # to ensure list is the same size as the full day signal
num_minutes_in_day = 24*60
#create array indicating whether person was still or not for each second of the day
#to be still the momentary_stillness signal must be true for more than 95% of the minute
#containing that second
second_stillness = [0]*SECONDS_IN_DAY
for i in range(num_minutes_in_day):
hours_start = int(i / 60)
mins_start = i % 60
hours_end = int((i+1) / 60)
mins_end = (i+1) % 60
start_idx = getIndexFromTimestamp(hours_start, mins_start)
end_idx = getIndexFromTimestamp(hours_end, mins_end)
this_minute = momentary_stillness[start_idx:end_idx]
minute_stillness = sum(this_minute) > PERCENT_STILLNESS_THRESHOLD*(60*SAMPLING_RATE)
second_idx = int(start_idx/8)
for si in range(second_idx,second_idx+60):
second_stillness[si] = float(minute_stillness)
return second_stillness
def computeAccelerometerFeaturesOverOneTimeFrame(motion, steps, stillness):
''' Computes all available features for a time period. Incoming signals are assumed to be from
only that time period.
Args:
motion: 8Hz root mean squared 3 axis acceleration
steps: 8Hz binary signal that is 1 if there is a step
stillness: 1Hz 1 if the person was still during this second, 0 otherwise
Returns:
A list of features containing (in order):
-Step count number of steps detected
-mean step time during movement average number of samples between two steps (aggregated first to 1 minute,
then we take the mean of only the parts of this signal occuring during movement)
-percent stillness percentage of time the person spent nearly motionless
'''
features = []
features.extend(computeStepFeatures(steps,stillness))
features.append(countStillness(stillness))
return features
def computeStepFeatures(steps,stillness):
'''Counts the total number of steps over a given period,
as well as the average time between steps (meant to approximate walking speed)
Args:
steps: an binary array at 8 Hz that is 1 every time there is a step
Returns:
sum: the number of steps in a period
median time: average number of samples between two steps'''
sum_steps = float(sum(steps))
step_indices = np.nonzero(steps)[0]
diff = step_indices[1:]-step_indices[:-1]
#ensure length of step difference array is the same so we can get the actual locations of step differences
timed_step_diff = np.empty(len(steps)) * np.nan
timed_step_diff[step_indices[:len(diff)]] = diff
signal_length_1s = len(stillness)
signal_length_1min = int(signal_length_1s / 60)
# if there aren't enough steps during this period, cannot accurately compute mean step diff
if len(timed_step_diff) < signal_length_1min:
return [sum_steps, np.nan]
agg_stillness = aggregateSignal(stillness, signal_length_1min, 'max')
agg_step_diff = aggregateSignal(timed_step_diff, signal_length_1min, 'mean')
movement_indices = [i for i in range(len(agg_stillness)) if agg_stillness[i] == 0.0]
step_diff_during_movement = agg_step_diff[movement_indices]
return [sum_steps,round(np.nanmean(step_diff_during_movement),10)]
def countStillness(stillness):
'''Counts the total percentage of time spent still over a period
Args:
stillness: an binary array at 1Hz that is 1 if that second is part of a still period
Returns:
the percentage time spent still over a period'''
return float(sum(stillness)) / float(len(stillness))
def aggregateSignal(signal, new_signal_length, agg_method='sum'):
new_signal = np.zeros(new_signal_length)
samples_per_bucket = int(len(signal) / new_signal_length)
#the new signal length must be large enough that there is at least 1 sample per bucket
assert(samples_per_bucket > 0)
for i in range(new_signal_length):
if agg_method == 'sum':
new_signal[i] = np.nansum(signal[i*samples_per_bucket:(i+1)*samples_per_bucket])
elif agg_method == 'percent':
new_signal[i] = np.nansum(signal[i*samples_per_bucket:(i+1)*samples_per_bucket]) / samples_per_bucket
elif agg_method == 'mean':
new_signal[i] = np.nanmean(signal[i*samples_per_bucket:(i+1)*samples_per_bucket])
elif agg_method == 'max':
new_signal[i] = np.nanmax(signal[i*samples_per_bucket:(i+1)*samples_per_bucket])
return new_signal
def getIndexFromTimestamp(hours, mins=0):
return ((hours * 60) + mins) * 60 * SAMPLING_RATE
def inputTimeFrames():
'''Allows user to choose the time frames over which they compute accelerometer features.'''
time_frames = []
print("Accelerometer features can be extracted over different time periods.")
cont = get_user_input("If you would like to enter a time period over which to compute features, enter 'y', or press enter to compute features over the entire file.")
while cont == 'y' or cont == 'Y':
start = int(get_user_input("Enter the starting hour of the time period (hour 0 is when the file starts):"))
end = int(get_user_input("Enter the ending hour of the time period (hour 0 is when the file starts; use -1 for the end of the file):"))
start = getIndexFromTimestamp(int(start))
if end != -1:
end = getIndexFromTimestamp(int(end))
time_frames.append([start,end])
print("Great! Now computing features for the following time periods:"+ str(time_frames))
cont = get_user_input("To add another time period, enter 'y'. To finish, press enter.")
if len(time_frames) == 0:
time_frames = [[0,-1]] # the whole file
return time_frames
def saveFeaturesToFile(features, time_frames, output_file):
of = open(output_file, 'w')
of.write("Time period start hour, Time period end hour, Step count, Mean step time during movement, Percent stillness\n")
tf_i = 0
for tf in time_frames:
output_str = str(tf[0]) + ' , ' + str(tf[1])
for feat in features[tf_i]:
output_str += ' , ' + str(feat)
tf_i += 1
of.write(output_str + '\n')
of.close()
print("Saved features to file"+ output_file)
# draws a graph of the data with the peaks marked on it
# assumes that 'data' dataframe already contains the 'peaks' column
def plotSteps(data, x_seconds, sampleRate = SAMPLING_RATE):
if x_seconds:
time_m = np.arange(0,len(data))/float(sampleRate)
realign = 128/(sampleRate)
else:
time_m = np.arange(0,len(data))/(sampleRate*60.)
realign = 128/(sampleRate*60.)
data_min = data['motion'].min()
data_max = data['motion'].max()
#Plot the data with the Peaks marked
plt.figure(1,figsize=(20, 5))
plt.plot(time_m,data['motion'])
for i in range(len(data)):
if data.iloc[i]["steps"]==1:
x_loc = time_m[i] - realign
plt.plot([x_loc,x_loc],[data_min,data_max],"k")
step_height = data_max * 1.15
#data['steps_plot'] = data['steps'] * step_height
#plt.plot(time_m,data['steps_plot'],'k')
plt.xlim([0,time_m[-1]])
plt.ylim([data_min-.1,data_max+.1])
plt.title('Motion with Detected "Steps" marked')
plt.ylabel('g')
if x_seconds:
plt.xlabel('Time (s)')
else:
plt.xlabel('Time (min)')
plt.show()
if __name__ == "__main__":
print("This script will extract features related to accelerometer data.")
data, filepath_confirm = getInputLoadFile()
output_path = getOutputPath()
time_frames = inputTimeFrames()
features, steps, motion = computeAllAccelerometerFeatures(data, time_frames)
data["steps"] = steps
data["motion"] = motion
saveFeaturesToFile(features, time_frames, output_path)
print("")
plot_ans = get_user_input("Do you want to plot the detected steps? (y/n): ")
if 'y' in plot_ans:
secs_ans = get_user_input("Would you like the x-axis to be in seconds or minutes? (sec/min): ")
if 'sec' in secs_ans:
x_seconds=True
else:
x_seconds=False
plotSteps(data, x_seconds)
else:
print("\tOkay, script will not produce a plot")