-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathenvironments.py
53 lines (43 loc) · 1.73 KB
/
environments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
from osim.env import RunEnv
from gym.spaces import Box, MultiBinary
class RunEnv2(RunEnv):
def __init__(self, state_transform, visualize=False, max_obstacles=3,
skip_frame=5, reward_mult=10.):
super(RunEnv2, self).__init__(visualize, max_obstacles)
self.state_transform = state_transform
self.observation_space = Box(-1000, 1000, state_transform.state_size)
self.action_space = MultiBinary(18)
self.skip_frame = skip_frame
self.reward_mult = reward_mult
def reset(self, difficulty=2, seed=None):
s = super(RunEnv2, self).reset(difficulty=difficulty, seed=seed)
self.state_transform.reset()
s, _ = self.state_transform.process(s)
return s
def _step(self, action):
action = np.clip(action, 0, 1)
info = {'original_reward':0}
reward = 0.
for _ in range(self.skip_frame):
s, r, t, _ = super(RunEnv2, self)._step(action)
info['original_reward'] += r
s, obst_rew = self.state_transform.process(s)
reward += r + obst_rew
if t:
break
return s, reward*self.reward_mult, t, info
class JumpEnv(RunEnv):
noutput = 9
ninput = 38
def __init__(self, visualize=False, max_obstacles=0):
super(JumpEnv, self).__init__(visualize, max_obstacles)
self.action_space = MultiBinary(9)
def get_observation(self):
observation = super(JumpEnv, self).get_observation()
return observation[:-3]
def _step(self, action):
action = np.tile(action, 2)
#action = np.repeat(action, 2)
s, r, t, info = super(JumpEnv, self)._step(action)
return s, 10*r, t, info