-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathconfig.py
155 lines (122 loc) · 4.32 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from typing import List, Optional
from pathlib import Path
from dataclasses import dataclass, field
from dataclasses_json import dataclass_json
from cached_property import cached_property
@dataclass_json
@dataclass
class Config(object):
"""
Attributes
----------
decay_rate:
Decay rate of the momentum
data_paths:
List of paths to input files. The input files can be split into
multiple csv as long as they have same header and format.
When loading the data through `get_train_test_datasets` one can
choose whether the csv have to be merged (when loaded) and then
transformed into windows or merged only after windowing. This
was done since the SML2010 dataset is split into two csvs with the
second starting at a timestep far later than the first one
target_cols:
The name of the columns that are the target values (y) for the
prediction. When the dataset is returned from
`get_train_test_datasets` the X will contain values of the columns in
(header minus (target_cols union drop_cols)) while the y values of the
`target_cols`.
drop_cols:
The name of the columns to be dropped from the csv.
In the case of SML2010 some columns are 0, such as the ones regarding
Exterior_Entalpic_*
n:
Number of driving series
m:
The size of the hidden state of the encoder. In the case of encoder
decoder model this will be also the decoder's hidden size
p:
The size of the hidden state of the decoder. Ignored in encoder
decoder model
sep:
The pattern separating columns in the csvs
T:
The number of past values the predictor can perceive. T values
of the driving series (X) and T-1 values for the target series.
The T-th values of the target series (y) are the ones to be predicted
learning_rate:
Learning rate for optimizing the parameters
decay_frequency:
max_gradient_norm:
Used in gradient clipping
optimizer:
Optimizer name
batch_size:
Number of windows to be processed at the same time
num_epochs:
Number of epochs to train the network
log_dir:
Directory for logging
train_ratio:
Portion of the data to be used as training set. The remainder of
the portion is equally split into test and validation.
report_frequency:
Print loss and train speed each [this param] batches
plot_frequency:
Plot true and predicted curves each [this param] epochs
seed:
Seed used by frameworks to ensure reproducibility
inp_att_enabled:
Whether the input attention is enabled or not
temporal_att_enabled:
Whether the temporal attention is enabled or not
"""
decay_rate: float
data_paths: List[str]
target_cols: List[str]
drop_cols: Optional[List[str]] = field(default_factory=list)
m: int = 64
p: int = 64
sep: str = ","
T: int = 10
learning_rate: float = 0.001
decay_frequency: int = 1000
max_gradient_norm: float = 5
optimizer: str = "adam"
batch_size: int = 128
num_epochs: int = 10
log_dir: str = "log/"
train_ratio: float = 0.8
report_frequency: int = 50
plot_frequency: int = 5
seed: int = 42
inp_att_enabled: bool = True
temporal_att_enabled: bool = True
@cached_property
def log_path(self):
return Path(self.log_dir)
@cached_property
def usecols(self):
path = self.data_paths[0]
with open(path) as f:
header = f.readline().strip().split(self.sep)
return [col for col in header if col not in self.drop_cols]
@cached_property
def driving_series(self):
return [col for col in self.usecols if col not in self.target_cols]
@cached_property
def n(self):
return len(self.driving_series)
@classmethod
def from_file(cls, path):
with open(path) as f:
c = cls.from_json(f.read())
c.log_dir = Path(c.log_dir) / Path(path).with_suffix('').name
return c
def to_file(self, path):
with open(path, "w") as f:
f.write(self.to_json(indent=4))
# Test
if __name__ == "__main__":
with open("conf/SML2010.json") as f:
c = Config.from_json(f.read())
print(c.to_json())