forked from servomac/Human-Activity-Recognition
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
55 lines (45 loc) · 1.57 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
import numpy as np
DATADIR = 'UCI HAR Dataset'
SIGNALS = [
"body_acc_x",
"body_acc_y",
"body_acc_z",
"body_gyro_x",
"body_gyro_y",
"body_gyro_z",
"total_acc_x",
"total_acc_y",
"total_acc_z"
]
def _read_csv(filename):
return pd.read_csv(filename, delim_whitespace=True, header=None)
def load_signals(subset):
signals_data = []
for signal in SIGNALS:
filename = f'{DATADIR}/{subset}/Inertial Signals/{signal}_{subset}.txt'
signals_data.append(
_read_csv(filename).to_numpy()
)
# Transpose is used to change the dimensionality of the output,
# aggregating the signals by combination of sample/timestep.
# Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
return np.transpose(signals_data, (1, 2, 0))
def load_y(subset):
"""
The objective that we are trying to predict is a integer, from 1 to 6,
that represents a human activity. We return a binary representation of
every sample objective as a 6 bits vector using One Hot Encoding
(https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
"""
filename = f'{DATADIR}/{subset}/y_{subset}.txt'
y = _read_csv(filename)[0]
return pd.get_dummies(y).to_numpy()
def load_data():
"""
Obtain the dataset from multiple files.
Returns: X_train, X_test, y_train, y_test
"""
X_train, X_test = load_signals('train'), load_signals('test')
y_train, y_test = load_y('train'), load_y('test')
return X_train, X_test, y_train, y_test