-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_godzilla.py
223 lines (180 loc) · 5.75 KB
/
train_godzilla.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import torch
import stable_baselines3
import sys
import numpy as np
from quadcopter_env import Quadcopter3DGates
import os
# reload sympy
from sympy import *
print("python version:", sys.version)
print("stable_baselines3 version:", stable_baselines3.__version__)
print("torch version:", torch.__version__)
print("cuda available:", torch.cuda.is_available())
print("cuda version:", torch.version.cuda)
print("cudnn version:", torch.backends.cudnn.version())
# set device
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print("device:", device)
# set torch default device
torch.set_default_device(device)
# Equations of motion 3D quadcopter + INDI inner loop
state = symbols('x y z v_x v_y v_z phi theta psi p q r T_norm')
x,y,z,vx,vy,vz,phi,theta,psi,p,q,r,T_norm = state
# commands normalized to [-1,1]
control = symbols('p_cmd,q_cmd,r_cmd,T_cmd')
p_cmd,q_cmd,r_cmd,T_cmd = control
# Physics
g = 9.81
# INDI params
taup = 0.38205684394448125
tauq = 0.38205684394448125
taur = 10.98805347
tauT = 0.5654755665147349
#KT = 2.0392824564266964e-05
p_min = -1.0
p_max = 1.0
q_min = -1.0
q_max = 1.0
r_min = -.3
r_max = .3
T_min = 0.0
T_max = 9.81*1.06
# Drag model
k_x = 0.33915248
k_y = 0.4314916
# Rotation matrix
Rx = Matrix([[1, 0, 0], [0, cos(phi), -sin(phi)], [0, sin(phi), cos(phi)]])
Ry = Matrix([[cos(theta), 0, sin(theta)], [0, 1, 0], [-sin(theta), 0, cos(theta)]])
Rz = Matrix([[cos(psi), -sin(psi), 0], [sin(psi), cos(psi), 0], [0, 0, 1]])
R = Rz*Ry*Rx
# Body velocity
vbx, vby, vbz = R.T@Matrix([vx,vy,vz])
# Drag
Dx = -k_x*vbx
Dy = -k_y*vby
# Dynamics
d_x = vx
d_y = vy
d_z = vz
T = (T_norm+1)/2*(T_max-T_min)+T_min
d_vx, d_vy, d_vz = Matrix([0,0,g]) + R@Matrix([Dx, Dy,-T])
d_phi = p + q*sin(phi)*tan(theta) + r*cos(phi)*tan(theta)
d_theta = q*cos(phi) - r*sin(phi)
d_psi = q*sin(phi)/cos(theta) + r*cos(phi)/cos(theta)
d_p = ((p_cmd+1)/2*(p_max-p_min)+p_min - p)/taup
d_q = ((q_cmd+1)/2*(q_max-q_min)+q_min - q)/tauq
d_r = ((r_cmd+1)/2*(r_max-r_min)+r_min - r)/taur
d_T_norm = (T_cmd - T_norm)/tauT
# State space model
f = [d_x, d_y, d_z, d_vx, d_vy, d_vz, d_phi, d_theta, d_psi, d_p, d_q, d_r, d_T_norm]
# lambdify
f_func = lambdify((Array(state), Array(control)), Array(f), 'numpy')
axs, ays, azs = R*Matrix([0,0,x])
import importlib
from quadcopter_animation import animation
importlib.reload(animation)
# Define the race track
# gate_pos = np.array([
# [-1.5,-2,-1.5],
# [1.5,2,-1.5],
# [1.5,-2,-2.5],
# [-1.5,2,-1.5]
# ]*2)
gate_pos = np.array([
[ 2,-1.5,-1.5],
[ 2, 1.5,-1.5],
[-2, 1.5,-1.5],
[-2,-1.5,-1.5]
]*2)
# gate_yaw = np.array([
# 0,
# 0,
# np.pi,
# np.pi
# ]*2)
gate_yaw = np.array([
np.pi/4,
3*np.pi/4,
5*np.pi/4,
7*np.pi/4
]*2)
# start_pos = gate_pos[0] - np.array([2,0,0])
start_pos = gate_pos[3] #- np.array([2,0,0])
# Run a random agent
env.reset()
done = False
def run():
global done
action = np.random.uniform(-1,1, size=(num,4))
# action[:,3] = 1
state, reward, done, _ = env.step(action)
# print(state[0][-4:])
# print(env.disturbances[0])
if reward[0] > 1:
print("reward:", reward)
return env.render()
#animation.view(run, gate_pos=gate_pos, gate_yaw=gate_yaw) #, record_steps=1000, show_window=True)
import os
from stable_baselines3 import PPO
from datetime import datetime
from stable_baselines3.common.vec_env import VecMonitor
import importlib
from quadcopter_animation import animation
models_dir = 'results/models/PPO_INDI'
log_dir = 'resultslogs/PPO_INDI'
video_log_dir = 'results/videos/PPO_INDI'
if not os.path.exists(models_dir):
os.makedirs(models_dir)
if not os.path.exists(log_dir):
os.makedirs(log_dir)
if not os.path.exists(video_log_dir):
os.makedirs(video_log_dir)
# Date and time string for unique folder names
datetime_str = datetime.now().strftime("%Y%m%d-%H%M%S")
# Create the environment
env = Quadcopter3DGates(num_envs=100, gates_pos=gate_pos, gate_yaw=gate_yaw, start_pos=start_pos, gates_ahead=1, f_func=f_func)
test_env = Quadcopter3DGates(num_envs=10, gates_pos=gate_pos, gate_yaw=gate_yaw, start_pos=start_pos, gates_ahead=1, pause_if_collision=True, f_func=f_func)
# Wrap the environment in a Monitor wrapper
env = VecMonitor(env)
# MODEL DEFINITION
policy_kwargs = dict(activation_fn=torch.nn.ReLU, net_arch=[dict(pi=[100,100,100], vf=[100,100,100])], log_std_init = 0)
model = PPO(
"MlpPolicy",
env,
policy_kwargs=policy_kwargs,
verbose=0,
tensorboard_log=log_dir,
n_steps=1000,
batch_size=5000,
n_epochs=10,
gamma=0.999
)
print(model.policy)
print(model.num_timesteps)
def animate_policy(model, env, deterministic=False, **kwargs):
env.reset()
def run():
actions, _ = model.predict(env.states, deterministic=deterministic)
states, rewards, dones, infos = env.step(actions)
return env.render()
#animation.view(run, gate_pos=env.gate_pos, gate_yaw=env.gate_yaw, **kwargs)
# animate untrained policy (use this to set the recording camera position)
animate_policy(model, test_env)
def train(model, test_env, log_name, n=1000):
# save every 10 policy rollouts
TIMESTEPS = model.n_steps*env.num_envs*10
for i in range(0,n):
model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=log_name)
time_steps = model.num_timesteps
# save model
model.save(models_dir + '/' + log_name + '/' + str(time_steps))
# save policy animation
animate_policy(
model,
test_env,
record_steps=1000,
record_file=video_log_dir + '/' + log_name + '/' + str(time_steps) + '.mp4',
show_window=False
)
all_items = os.listdir(models_dir + '/' + log_name)
train(model, test_env, 'INDI_TEST2_with_rate_penalty')