-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathenvironment.py
79 lines (55 loc) · 2.21 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import collections
import cv2
import numpy as np
class ALEEnvironment:
def __init__(self, rom_file, args=None):
from ale_python_interface import ALEInterface
self.ale = ALEInterface()
# Set Env Variables
self.ale.setInt('frame_skip', 1)
self.ale.setFloat('repeat_action_probability', 0.0)
self.ale.setBool('color_averaging', False)
self.ale.setInt('random_seed', 123)
self.ale.setBool('sound', False)
self.ale.setBool('display_screen', False)
self.frame_skip = 4
self.initial_skip_actions = 5
self.screen_width = 160#84
self.screen_height = 210#84
self.channels = 3
self.last_screen = np.zeros((self.screen_height, self.screen_width, self.channels))
self.ale.loadROM(rom_file)
self.actions = self.ale.getMinimalActionSet()
self.life_lost = False
self.training = True
def reset(self, train=True):
self.training = train
if ( self.ale.game_over()
or not (train and self.life_lost) ):
self.ale.reset_game()
self.last_screen.fill(0.0)
for i in range(self.initial_skip_actions):
self.step(0)
state = self._get_screen()#self.get_screens()
return state
def step(self, action):
reward = 0
lives = self.ale.lives()
for i in range(self.frame_skip):
reward += self.ale.act(self.actions[action])
if i==(self.frame_skip-1): self._get_screen() # get screen to update last
screen = self._get_screen()
#self._add_screen(screen)
state = screen #self.get_screens()
self.life_lost = (not (lives == self.ale.lives()))
terminal = self.ale.game_over() or (self.life_lost and self.training)
info = []
return state, reward, terminal, info
def numActions(self):
return len(self.actions)
def _get_screen(self):
screen = self.ale.getScreenRGB()#Grayscale()
#resized = np.array(cv2.resize(screen, (self.screen_width, self.screen_height)))
out_screen = np.maximum(screen, self.last_screen)
self.last_screen = screen
return out_screen