-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.yml
81 lines (76 loc) · 3.21 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Train command
TRAIN_COMMAND: [ python, upstream.py ]
# Path to training dataset
TRAIN_DATA: /path/to/upstream_dataset/
# Amount of training data used for validation (between 0 and 1)
#VALID_SPLIT: 0.1
SEED: 256
NUM_WORKERS: 2 # Num of workers for data loading
NUM_GPUS: [0] # How many GPUs to use
WANDB: False
PROJ_NAME: LTA2VUpstream # Just for logging
# -- Dataset class
DATASET: HUNT4Masked
DATASET_ARGS:
x_columns: [[0,1,2,3,4,5]] # Which columns to use 0=Bx, 1=By, 2=Bz, 3=Tx, 4=Ty, 5=Tz
y_columns: [[0,1,2,3,4,5]]
switch_sensor_prob: [0.0] # Switch x and y columns randomly with prob
sample_freq: [50] # Original sampling freq in Hz. Do not change
sequence_length: [5_400_000] # Window size (in samples) fed into the model, here 30hours
phase: [False]
normalize: [True] # Whether to normalize the training data
hour_wise_norm: [False] # Consider hour-based weighted normalization (default=True)
STFT: [True] # Compute STFT before feeding into model
stored_as_STFT: [True] # If dataset is already stored as spectrograms
stft_n_fft: [3_000] # Size of Fourier transform (in samples), here 1min
stft_hop_len: [null] # For STFT computation. If None, n_fft//2
num_train_hours: [657_360] # Amount of HUNT4 hours (80% of dataset)
num_valid_hours: [82_170] # Amount of HUNT4 hours (10% of dataset)
num_test_hours: [82_170] # Amount of HUNT4 hours (10% of dataset)
# Returns timestamps of x, i.e.: (x, x_ts), (y, mask)
# Timestamps are 5-dim int vectors of the form: [month, day, weekday, hour, minute]
return_timestamps: True
# Whether to extract day time only (6am-12am)
skip_night_time: False
##### Alteration parameters #####
# percentage of time frames to mask in spectrogram:
time_alter_percentage: 0.10
# number of consecutive time frames to mask in spectrogram:
time_alter_width: 5
# swap time windows (like in Liu et al. 2021) (default=True)
time_swap: False
# Probability of flipping time mask (default is 0.0)
time_mask_flip_prob: 0.5
# percentage of frequency bins to mask in spectrogram:
freq_alter_percentage: 0.05
# number of consecutive frequency bins to mask in spectrogram:
freq_alter_width: 75 # Around 5% of 1 spectrogram's freq bins
#################################
# -- Model
# Which classifier to use
ALGORITHM: TransformerEncoderNetwork
# Arguments for classifier
# (all given as lists in case to perform a GridSearch)
ALGORITHM_ARGS:
epochs: [15]
batch_size: [4]
loss: [maskedL1]
output_activation: [null] # null=identity
metrics: [null] # Which metrics to log
optimizer: [AdamW]
weight_decay: [1e-5]
lr: [1e-4]
lr_scheduler: [LinearWUSchedule]
# Architecture params
n_encoder_layers: [4]
nhead: [6]
d_model: [1500]
dim_feedforward: [2048] # Dim of ff layer inside encoder layer
# Local and Global Positional Encoding:
positional_encoding: [['TemporalPositionalEncoding', 'AbsolutePositionalEncoding']]
dropout: [0.1] # Dropout probability
n_prediction_head_layers: [1] # Num layers of MLP at end of model
dim_prediction_head: [null] # Dim of hidden layers of MLP at end od model
# What opertation to perform on sequence output?
# null="seq2seq model", mean="seq2single by averaging output sequences"
seq_operation: [null]