-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluate_models.py
37 lines (32 loc) · 1.09 KB
/
evaluate_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def test_episodes(model, env, num_episodes):
episode_rewards = []
num_steps = []
successes = []
for j in range(num_episodes):
done = False
steps = 0
obs = env.reset()
episode_rewards.append(0.0)
while(not done):
action, _states = model.predict(obs, deterministic = True)
obs, reward, done, _ = env.step(action)
episode_rewards[-1] += reward
steps += 1
successes.append(reward == 1)
num_steps.append(steps)
return episode_rewards, num_steps, successes
def look_at_an_episode(model, env):
done = False
steps = 0
obs = env.reset()
final_reward = 0
import structured.generate_actions
actions = structured.generate_actions.generate_actions()
while(not done):
action, _states = model.predict(obs, deterministic = True)
print(action, actions[action], end = ":")
_obs, reward, done, resp = env.step(action)
print(reward, resp["msg"])
final_reward += reward
steps += 1
print("steps", steps, "reward", final_reward)