-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
73 lines (60 loc) · 2.86 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os
class Linear_QNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size) -> None:
super().__init__()
# Defining 2 layers for Neural Net
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
# Using relu activation function to only take in positive values
x = F.relu(self.linear1(x))
x = self.linear2(x) # relu not needed here => redundant
return x
def save(self, file_name='model_obstacles4.pth'): # saving whatever model we have into our folders
model_folder_path = './model'
if not os.path.exists(model_folder_path):
os.makedirs(model_folder_path)
file_name = os.path.join(model_folder_path, file_name)
torch.save(self.state_dict(), file_name)
class QTrainer:
def __init__(self, model, learning_rate, gamma) -> None:
self.learning_rate = learning_rate # learning_rate is how much adjustment in weights and biases should be made in back propagation
self.gamma = gamma # what role exactly does gamma have in the equations and overall result?
self.model = model
self.optim = optim.Adam(model.parameters(), lr=self.learning_rate)
self.criterion = nn.MSELoss() # (Q_new - Q)^2 = MSE_loss
def train_step(self, old_state, action, reward, new_state, game_over):
'''
Inputs:
- old_state: previous state before action was performed
- action: straight, left, or right
- reward: -10 if snake dies, 0 if nothing happens, and 10 if snake gets food
- new_state: state of snake after action is performed
- game_over: is the game over? (has the snake died?)
'''
state = torch.tensor(old_state, dtype=torch.float)
next_state = torch.tensor(new_state, dtype=torch.float)
action = torch.tensor(action, dtype=torch.long)
reward = torch.tensor(reward, dtype=torch.float)
if len(state.shape) == 1:
state = torch.unsqueeze(state, 0)
next_state = torch.unsqueeze(next_state, 0)
action = torch.unsqueeze(action, 0)
reward = torch.unsqueeze(reward, 0)
game_over = (game_over, )
# BELLMAN EQUATION and Q UPDATE RULE
pred = self.model(state)
target = pred.clone()
for i in range(len(game_over)):
Q_new = reward[i]
if not game_over[i]:
Q_new = reward[i] + (self.gamma * torch.max(self.model(next_state[i]))) # applying the Q update rule
target[i][torch.argmax(action).item()] = Q_new
self.optim.zero_grad()
loss = self.criterion(target, pred)
loss.backward() # back propagate
self.optim.step()