Skip to content

Commit

Permalink
trend_without_fft
Browse files Browse the repository at this point in the history
  • Loading branch information
Liphos committed Sep 9, 2022
1 parent 849f708 commit 7e1959a
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 45 deletions.
55 changes: 32 additions & 23 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,64 @@
from datasets import load_dataset
from binreader import open_binary_file
from pathlib import Path
from typing import Dict


def import_dataset(name:str, split:float=0.2):
def import_dataset(name:str, split:float=0.2, shuffle=True, extra_args:Dict[str, bool]={}):
datasets = {"minds14":import_minds_hugging_face,
"trend":import_data_TREND,
}
if name in datasets:
return datasets[name](split)
return datasets[name](split, shuffle, extra_args)
else:
raise ValueError("This key is not associated with a dataset")


def import_minds_hugging_face(split:float):
def import_minds_hugging_face(split:float, shuffle:bool, extra_args:Dict[str, bool]):
minds = load_dataset("PolyAI/minds14", "fr-FR") # for French
audio_input = np.array([minds["train"][i]["audio"]["array"] for i in range(len(minds["train"]))])
intent_class = np.array([minds["train"][i]["intent_class"] for i in range(len(minds["train"]))])

return (minds["train"], minds["test"])

def import_data_TREND(split:float):
def import_data_TREND(split:float, shuffle:bool, extra_args:Dict[str, bool]):
#Data for signal analysis
data_selected = open_binary_file(Path("./MLP6_selected.bin"))/255
data_anthropique = open_binary_file(Path("./MLP6_transient.bin"))/255
data_anthropique2 = open_binary_file(Path("./MLP6_transient_2.bin"))/255
if "use_fourier_transform" in extra_args:
use_fourier_transform = extra_args["use_fourier_transform"]

data_selected = open_binary_file(Path("./data/MLP6_selected.bin"))/255
data_anthropique = open_binary_file(Path("./data/MLP6_transient.bin"))/255
data_anthropique2 = open_binary_file(Path("./data/MLP6_transient_2.bin"))/255

data_selected = data_selected[:, 256:] #We remove the beginning where there is nothing
data_anthropique = data_anthropique[:, 256:]

data_size = len(data_selected)
print(data_selected.shape)
print(data_anthropique.shape)

data_train = np.concatenate([data_selected[:int(data_size*(1-split))], data_anthropique[:int(data_size*(1-split))]])
data_test = np.concatenate([data_selected[int(data_size*(1-split)):], data_anthropique[int(data_size*(1-split)):]])

labels_train = np.concatenate([np.ones((int(data_size*(1-split)),)), np.zeros((int(data_size*(1-split)),))])
labels_test = np.concatenate([np.ones((int(data_size*(split)),)), np.zeros((int(data_size*(split)),))])

indicies = np.arange(len(data_train))
indicies = np.arange(len(data_selected))
np.random.shuffle(indicies)

data_train, labels_train = data_train[indicies], labels_train[indicies]
data_selected = data_selected[indicies]


indicies = np.arange(len(data_test))
indicies = np.arange(len(data_anthropique))
np.random.shuffle(indicies)
data_test, labels_test = data_test[indicies], labels_test[indicies]
data_anthropique = data_anthropique[indicies]

data_train = data_train[:, 256:] #We remove the beginning where there is nothing
data_test = data_test[:, 256:]
if use_fourier_transform:
data_selected = np.stack([np.fft.fft(data_selected).real, np.fft.fft(data_selected).imag], axis=1)
data_anthropique = np.stack([np.fft.fft(data_anthropique).real, np.fft.fft(data_anthropique).imag], axis=1)
data_train = np.concatenate([data_selected[:int(data_size*(1-split))], data_anthropique[:int(data_size*(1-split))]], axis=0)
data_test = np.concatenate([data_selected[int(data_size*(1-split)):], data_anthropique[int(data_size*(1-split)):]], axis=0)

else:
data_train = np.expand_dims(np.concatenate([data_selected[:int(data_size*(1-split))], data_anthropique[:int(data_size*(1-split))]]), axis=1)
data_test = np.expand_dims(np.concatenate([data_selected[int(data_size*(1-split)):], data_anthropique[int(data_size*(1-split)):]]), axis=1)
data_train = data_train - np.expand_dims(np.mean(data_train, axis=-1), axis=-1) #We normalize the input
data_test = data_test - np.expand_dims(np.mean(data_test, axis=-1), axis=-1)

data_train = data_train - np.expand_dims(np.mean(data_train, axis=-1), axis=-1) #We normalize the input
data_test = data_test - np.expand_dims(np.mean(data_test, axis=-1), axis=-1)
labels_train = np.expand_dims(np.concatenate([np.ones((int(data_size*(1-split)),)), np.zeros((int(data_size*(1-split)),))]), axis=1)
labels_test = np.expand_dims(np.concatenate([np.ones((int(data_size*(split)),)), np.zeros((int(data_size*(split)),))]), axis=1)


return (data_train, labels_train), (data_test, labels_test)
17 changes: 7 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
print("Unknown exception")


comment = "exp_scheduler"
comment = "trend_model"
tensorboard_log_dir = "./Models/"+ dataset_name + "/" + comment + "-" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S") # + str(len(os.listdir("./Models/"+ dataset_name))) To count the experiments


Expand All @@ -44,12 +44,13 @@

#Define loss funct and optimizer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-4)
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=1)
batch_size = 10

#Import dataset
data_train, data_test = import_dataset(dataset_name)
dataset_args = {"use_fourier_transform":False}
data_train, data_test = import_dataset(dataset_name, split=0.2, shuffle=True, extra_args=dataset_args)

print(data_train[0].shape, data_train[1].shape)
for k in range(1):
Expand Down Expand Up @@ -77,11 +78,7 @@ def train_epoch(epoch:int, data, data_labels, is_testing:bool=False):
inputs, labels = data[i*batch_size: np.minimum((i+1)*batch_size, size)], data_labels[i*batch_size: np.minimum((i+1)*batch_size, size)] #We normalize the inputs
# Every data instance is an input + label pair
inputs = torch.as_tensor(inputs, dtype=torch.float32, device=device)
inputs = torch.unsqueeze(inputs, dim=1)

labels = torch.as_tensor(labels, dtype=torch.float32, device=device)
labels = torch.unsqueeze(labels, dim=1)

# Zero your gradients for every batch!
optimizer.zero_grad()
if is_testing:
Expand Down Expand Up @@ -125,14 +122,14 @@ def train_epoch(epoch:int, data, data_labels, is_testing:bool=False):
mean_counter = 0

#training
nb_epoch = 40
nb_epoch = 30
for i in range(nb_epoch):
print(f"epoch: {i}, lr: {lr_scheduler.get_last_lr()}")
train_epoch(i, data_train[0], data_train[1])
train_epoch(i, data_test[0], data_test[1], is_testing=True)
torch.save(model.state_dict, tensorboard_log_dir + "/checkpoint" + str(nb_epoch) + ".pth")
if i % 5 == 0:
torch.save(model.state_dict, tensorboard_log_dir + "/checkpoint" + str(nb_epoch) + ".pth")

model.save_txt(tensorboard_log_dir + "/architecture.txt")

writer.flush()
writer.close()
45 changes: 33 additions & 12 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,60 @@ def __init__(self):
super(SimpleModel, self).__init__()
self.layers = []

self.conv1 = F.Conv1d(1, 16, kernel_size=15, padding=7)
self.conv1 = F.Conv1d(1, 128, kernel_size=15, padding=7)
self.layers.append(self.conv1)

self.batch_norm1 = F.BatchNorm1d(128)
self.layers.append(self.batch_norm1)

self.conv2 = F.Conv1d(16, 32, kernel_size=8, padding=3)
self.conv2 = F.Conv1d(128, 128, kernel_size=7, padding=3)
self.layers.append(self.conv2)

self.conv3 = F.Conv1d(32, 64, kernel_size=8, padding=3)
self.batch_norm2 = F.BatchNorm1d(128)
self.layers.append(self.batch_norm2)

self.conv3 = F.Conv1d(128, 128, kernel_size=7, padding=3)
self.layers.append(self.conv3)


self.dense1 = F.Linear(383*32, 4096)
self.batch_norm3 = F.BatchNorm1d(128)
self.layers.append(self.batch_norm3)

self.conv4 = F.Conv1d(128, 128, kernel_size=7, padding=3)
self.layers.append(self.conv3)

self.batch_norm4 = F.BatchNorm1d(128)
self.layers.append(self.batch_norm4)

self.dense1 = F.Linear(12*128, 1024)
self.layers.append(self.dense1)
self.dense2 = F.Linear(4096, 1)
self.dense2 = F.Linear(1024, 1)
self.layers.append(self.dense2)

self.dropout = F.Dropout(0.5)
self.dropout = F.Dropout(0.1)
self.layers.append(self.dropout)

self.activation = F.ReLU()
self.layers.append(self.activation)

self.maxpool = F.MaxPool1d(2)
self.maxpool = F.MaxPool1d(7, stride=4, padding=3)
self.layers.append(self.maxpool)

self.flatten = F.Flatten()
self.sigmoid = F.Sigmoid()

def forward(self, x):
x = self.activation(self.dropout(self.conv1(x)))
x = self.maxpool(self.conv1(x))
x = self.dropout(self.activation(self.batch_norm1(x)))

x = self.dropout(self.activation(self.activation(self.batch_norm2(self.conv2(x))) + x))
x = self.maxpool(x)

x = self.dropout(self.activation(self.activation(self.batch_norm3(self.conv3(x))) + x))
x = self.maxpool(x)
x = self.activation(self.dropout(self.conv2(x)))
#x = self.maxpool(x)
#x = self.activation(self.dropout(self.conv3(x)))

x = self.dropout(self.activation(self.activation(self.batch_norm4(self.conv4(x))) + x))
# x = self.maxpool(x)

x = self.flatten(x)
x = self.activation(self.dense1(x))
x = self.sigmoid(self.dense2(x))
Expand Down

0 comments on commit 7e1959a

Please sign in to comment.