-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfuncs.py
175 lines (141 loc) · 6.88 KB
/
funcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# -*- coding: utf-8 -*-
import random
import numpy as np
import config
from agent import Agent, User
from game import Game, GameState
from model import Residual_CNN
def playMatchesBetweenVersions(env,
run_version,
player1version,
player2version,
EPISODES,
logger,
turns_until_tau0,
goes_first=0):
# 決定player1是人還是AI
if player1version == -1:
# 如果是人,執行act就必須input一個值進去
player1 = User('player1', env.state_size, env.action_size)
else:
# 如果是AI,先實作一個CNN出來,再實作進Agent實現mcts功能
player1_NN = Residual_CNN(config.REG_CONST,
config.LEARNING_RATE,
env.input_shape,
env.action_size,
config.HIDDEN_CNN_LAYERS)
if player1version > 0:
player1_network = player1_NN.read(env.name, run_version, player1version)
player1_NN.model.set_weights(player1_network.get_weights())
player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN)
# 決定player2是人還是AI
if player2version == -1:
player2 = User('player2', env.state_size, env.action_size)
else:
player2_NN = Residual_CNN(config.REG_CONST,
config.LEARNING_RATE,
env.input_shape,
env.action_size,
config.HIDDEN_CNN_LAYERS)
if player2version > 0:
player2_network = player2_NN.read(env.name, run_version, player2version)
player2_NN.model.set_weights(player2_network.get_weights())
player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN)
scores, memory, points, sp_scores = playMatches(player1,
player2,
EPISODES,
logger,
turns_until_tau0,
None,
goes_first)
return (scores, memory, points, sp_scores)
def playMatches(player1,
player2,
EPISODES,
logger,
turns_until_tau0,
memory=None,
goes_first=0):
env = Game()
scores = {player1.name: 0, "drawn": 0, player2.name: 0}
sp_scores = {"sp": 0, "drawn": 0, "nsp": 0} # sp是什麼? start player?
points = {player1.name: [], player2.name: []}
for e in range(EPISODES):
logger.info('====================')
logger.info('EPISODE %d OF %d', e+1, EPISODES)
logger.info('====================')
print(str(e+1) + ' ')
state = env.reset()
done = 0
turn = 0
player1.mcts = None
player2.mcts = None
# 1 or -1 的機率各一半
if goes_first == 0:
player1Starts = random.randint(0, 1) * 2 - 1
else:
player1Starts = goes_first
if player1Starts == 1: # 1就是player1先
players = {1: {'agent': player1, 'name': player1.name},
-1: {'agent': player2, 'name': player2.name}}
logger.info(player1.name + ' plays as X')
else:
players = {1: {'agent': player2, 'name': player2.name},
-1: {'agent': player1, 'name': player1.name}}
logger.info(player2.name + ' plays as X')
logger.info('--------------')
env.gameState.render(logger)
while done == 0:
turn = turn + 1
# Run the MCTS and return an action
if turn < turns_until_tau0:
action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 1)
else:
action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 0)
if memory != None:
# Commit the move to memory
memory.commit_stmemory(env.identities, state, pi)
logger.info('action: %d', action)
for r in range(env.grid_shape[0]):
logger.info(['----' if x==0 else '{0:.2f}'.format(np.round(x, 2)) for x in pi[env.grid_shape[1]*r : (env.grid_shape[1]*r + env.grid_shape[1])]])
try:
logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(MCTS_value,2))
logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(NN_value,2))
logger.info('====================')
except:
continue
# Do the action
state, value, done, _ = env.step(action)
env.gameState.render(logger)
if done == 1:
if memory != None:
# If the game is finished, assign the values correctly to the game moves
for move in memory.stmemory:
if move['playerTurn'] == state.playerTurn:
move['value'] = value
else:
move['value'] = -value
memory.commit_ltmemory()
# 判定誰獲勝
if value == 1:
logger.info('%s WINS!', players[state.playerTurn]['name'])
scores[players[state.playerTurn]['name']] = scores[players[state.playerTurn]['name']] + 1
if state.playerTurn == 1:
sp_scores['sp'] = sp_scores['sp'] + 1
else:
sp_scores['nsp'] = sp_scores['nsp'] + 1
elif value == -1:
logger.info('%s WINS!', players[-state.playerTurn]['name'])
scores[players[-state.playerTurn]['name']] = scores[players[-state.playerTurn]['name']] + 1
if state.playerTurn == 1:
sp_scores['nsp'] = sp_scores['nsp'] + 1
else:
sp_scores['sp'] = sp_scores['sp'] + 1
else:
logger.info('DRAW...')
scores['drawn'] = scores['drawn'] + 1
sp_scores['drawn'] = sp_scores['drawn'] + 1
pts = state.score
points[players[state.playerTurn]['name']].append(pts[0])
points[players[-state.playerTurn]['name']].append(pts[1])
return (scores, memory, points, sp_scores)