-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdataset.py
77 lines (64 loc) · 2.91 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import numpy as np
import torch
import os
from sklearn.preprocessing import MinMaxScaler
class CSVDataset(torch.utils.data.Dataset):
def __init__(self,
forecast_range,
dataset,
mode,
train_test_split_ratio=0.7,
sample_length=72,
training_set_scale=1,
training_set_start=0,
few_shot_num=10):
assert mode in ['train', 'test']
assert forecast_range >= 1
self.mode = mode
self.sample_length = sample_length
self.forecast_range = forecast_range
self.training_set_scale = training_set_scale
self.overlapping_split = 20
# l = os.listdir(dataset+'/')
# sensors = [i.split("_")[0] for i in l]
# sensors = list(set(sensors))
csv_file = f'dataset/{dataset}'
print('--- reading', csv_file)
df = pd.read_csv(csv_file)
df = df.dropna()
data = np.array(df)
#print(data)
raindrop = data[:-self.forecast_range, :]
#print(raindrop)
#normalization
raindrop = (raindrop - np.min(raindrop, axis=0)) / (np.max(raindrop, axis=0) - np.min(raindrop, axis=0))
runoff_history = data[:-self.forecast_range, 0]
runoff = data[self.forecast_range:, 0]
train_test_spilt = int(train_test_split_ratio * len(raindrop))
training_subset = int(train_test_spilt * self.training_set_scale)
if few_shot_num is not None:
start_range = int((train_test_spilt - training_subset) / (few_shot_num - 1))
offset = int(training_set_start * start_range)
else:
offset = 0
if self.mode == 'train':
self.raindrop = raindrop[offset:training_subset+offset, :]
self.runoff_history = runoff_history[offset:training_subset+offset]
self.runoff = runoff[offset:training_subset+offset]
elif self.mode == 'test':
self.raindrop = raindrop[train_test_spilt:, :]
self.runoff_history = runoff_history[train_test_spilt:]
self.runoff = runoff[train_test_spilt:]
print(f'--- loaded [{dataset}] [{self.mode}] set: '
f'input shape: {self.raindrop.shape}, '
f'target shape: {self.runoff.shape}, len(dataset)={len(self)}')
def __getitem__(self, index):
start = int(index / self.overlapping_split * self.sample_length) # 20-overlapping samples
return torch.from_numpy(self.raindrop[start:start + self.sample_length]), \
torch.from_numpy(self.runoff_history[start:start + self.sample_length]), \
torch.from_numpy(self.runoff[start:start + self.sample_length])
def __len__(self):
return int(len(self.raindrop) / self.sample_length * self.overlapping_split) - self.overlapping_split
def get_input_size(self):
return self.raindrop.shape[1]