-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgame.py
404 lines (318 loc) · 14.4 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
from infrastructure import *
import constraints
import effects
import numpy as np
import time
import copy
from agents import *
from qLearner import *
from qOutput import *
class Game(object):
def __init__(self, players, autogame = True, TESTCARD = None, handsize=5):
# constraints
self.basicValueConstraint = constraints.BasicValueConstraint(True)
self.basicSuitConstraint = constraints.BasicSuitConstraint()
self.wildValueConstraint = constraints.WildValueConstraint()
self.wildSuitEffect = constraints.WildSuitEffect()
self.poisonDistanceConstraint = constraints.PoisonDistanceConstraint()
self.poisonCardEffect = effects.PoisonCardEffect()
self.skipPlayerEffect = effects.SkipPlayerEffect()
self.screwOpponentEffect = effects.ScrewOpponentEffect()
# player stuff
self.players = players
self.activePlayer = 0 # the player after the dealer goes first. keeps track of which player is up
self.autogame = autogame
#deck stuff
self.startingHandSize = handsize
self.changeRuleRate = 1 #invariant -- DO NOT CHANGE
self.deck = Deck() #pre-shuffled deck
self.pile = [] # a list of discarded cards. DIFFERENT FROM DECK OBJECT.
self.lastCard = None
#round stuff
self.round = 0 # record which round we are on
self.gameHistory = GameHistory() # records the history of the game for training data
self.roundHistory = RoundHistory()
self.heuristicMode = False
if TESTCARD:
self.heuristicMode = True
self.testCard = TESTCARD
def deliverCombostate(self):
bVal = Rule(BASICVALUE, self.basicValueConstraint.greater)
wVal = Rule(WILDVALUE, self.wildValueConstraint.wildValue)
wSuit = Rule(WILDSUIT, self.wildSuitEffect.wildSuit)
pDist = Rule(POISONDIST, self.poisonDistanceConstraint.dist)
pCard = Rule(POISONCARD, self.poisonCardEffect.value)
skip = Rule(SKIPPLAYER, self.skipPlayerEffect.activatingValue)
screw = Rule(SCREWOPPONENT, self.screwOpponentEffect.activatingValue)
state = State(bVal, wVal, wSuit, pDist)
effectState = EffectState(pCard, screw, skip)
return CombinedState(state, effectState)
def makeModification(self, ruleTuple):
rule = ruleTuple.rule
setting = ruleTuple.setting
if rule == BASICVALUE:
self.basicValueConstraint.modify(setting)
elif rule == BASICSUIT:
pass
elif rule == WILDSUIT:
self.wildSuitEffect.modify(setting)
elif rule == WILDVALUE:
self.wildValueConstraint.modify(setting)
elif rule == POISONDIST:
self.poisonDistanceConstraint.modify(setting)
elif rule == SCREWOPPONENT:
self.screwOpponentEffect.modify(setting)
elif rule == SKIPPLAYER:
self.skipPlayerEffect.modify(setting)
elif rule == POISONCARD:
self.poisonCardEffect.modify(setting)
def isLegal(self, attemptedCard):
"""
Evaluates the card against the current constraints to see whether it is viable or not
Returns True or False
"""
#poison distance is the most powerful effect
if self.poisonDistanceConstraint.isActive(attemptedCard):
if (not self.poisonDistanceConstraint.isLegal(attemptedCard, self.lastCard)):
return False
#try effects. needs only ONE to return True
wildConstraints = [self.wildValueConstraint, self.wildSuitEffect]
for effect in wildConstraints:
if (effect.isActive(attemptedCard)):
if (effect.isLegal(attemptedCard, self.lastCard)):
return True
# try the basics (ie, ordering and other). These are always active
# need only ONE to pass as true
basicConstraints = [self.basicValueConstraint, self.basicSuitConstraint]
for constraint in basicConstraints:
if (constraint.isLegal(attemptedCard, self.lastCard)):
return True
return False # if all the constraints pass, return true
def notifyAll(self, notification):
"""
Notifies all players of a change in the gamestate.
The "game history" is also notified
"""
# print stuff for human players for human players
type = notification.type
if not self.autogame and not type == SKIPPLAYER: print self.players[self.activePlayer].name
if type == LEGAL:
# self.roundHistory.recordMove(notification)
if not self.autogame: print "LEGAL CARD PLAYED:", notification.attemptedCard, "\n"
elif type == PENALTY:
# self.roundHistory.recordMove(notification)
if not self.autogame: print "ILLEGAL CARD PLAYED:", notification.attemptedCard, "\n"
elif type == WON:
if not self.autogame: print "Player", self.players[self.activePlayer].name, "won!"
elif type == POISONCARD:
if not self.autogame: print "PENALTY CARD from:", notification.attemptedCard, "\n"
elif type == SKIPPLAYER:
if not self.autogame:
print "SKIPPING PLAYER", self.players[self.activePlayer].name, "using", notification.attemptedCard, "\n"
elif type == SCREWOPPONENT:
if not self.autogame: print "SCREWING PLAYER using:", notification.attemptedCard, "\n"
for player in self.players:
player.notify(notification, self)
# gets a card from the deck. Resets the pile if necessary.
# returns None if all the cards are in players hands (god help us)
def getCardFromDeck(self):
card = self.deck.drawCard()
# for durability, reset the deck
if (card == None):
assert (len(self.deck.cards) == 0)
if (len(self.pile) == 0):
#sheesh. literally all the cards have been played
return None
else:
# NOTE: THESE ARE UNTESTED!!! WATCH OUT FOR THIS SECTION!
# make a new deck using the pile
origLen = len(self.pile) # for assert
self.deck.cards = copy.copy(self.pile) #preserves references I think
self.pile = []
assert( origLen == len(self.deck.cards) ) #copying trips me out
self.deck.shuffle()
notification = Notification(DECKRESET, None, None)
self.notifyAll(notification)
return self.getCardFromDeck() #recurse, try to get another card
else:
return card
def enactEffects(self, attemptedCard):
"""
Returns a boolean, "skip_enacted" -- true if skipped, false if not.
Needed to properly tune activePlayer, because skip does some weird stuff
"""
if self.poisonCardEffect.isActive(attemptedCard):
self.poisonCardEffect.enactEffect(self, attemptedCard) #includes notification
if self.screwOpponentEffect.isActive(attemptedCard):
self.screwOpponentEffect.enactEffect(self, attemptedCard) #includes notification
if self.skipPlayerEffect.isActive(attemptedCard):
self.skipPlayerEffect.enactEffect(self, attemptedCard) #includes notification
return True
return False
# describes what happens during a player turn
#
# returns WON if a player won, 0 if not
def playerTurn(self, player):
"""
Describes and handles logic for a player attempting to place a card
Returns WON if the player won, and 0 if not
"""
attemptedCard = player.takeAction(self.lastCard) # the player tries to play a card
lastCard = self.lastCard
feedback = self.isLegal(attemptedCard) # the CONSTRAINTS for legality
if feedback == LEGAL:
# game state bookkeeping -- last card, and the pile
self.pile.append(attemptedCard)
self.lastCard = attemptedCard
#tell the player their move worked
player.getFeedback(True)
# notify all players of legality
notification = Notification(LEGAL, attemptedCard, lastCard)
self.notifyAll(notification)
#handle win conditions
if player.won():
return WON
else:
# enact effects
skipEnacted = self.enactEffects(attemptedCard)
# test that player didn't win by handing off a card
if player.won():
if skipEnacted:
# go back a player to handle skip special case (ie, don't screw with player order)
self.activePlayer = (self.activePlayer + len(self.players) - 1) % len(self.players)
return WON
return 0
else:
# return the card to the player, and penalize them with a new card
player.takeCard(attemptedCard)
penaltyCard = self.getCardFromDeck()
if penaltyCard:
player.takeCard(penaltyCard)
#tell player of illegality
player.getFeedback(False)
# notify all players of the penalty
notification = Notification(PENALTY, attemptedCard, lastCard)
self.notifyAll(notification)
def playRound(self, prevWinner=0):
"""
Plays a single round of the Mao card game.
Initilalized with whoever the previous winner was
Returns the player number of the winner of the round.
"""
def initNewRound(prevWinner): # resets the deck and pile after the end of each round
self.activePlayer = prevWinner
self.deck = Deck() #maybe not the best, but we can optimize later. ideally we fetch cards from every player
self.pile = []
self.roundHistory = RoundHistory() # declare a new round
# initialize the first card that is placed
initialCard = self.getCardFromDeck()
self.pile.append(initialCard)
self.lastCard = initialCard
self.notifyAll(Notification(NEWROUND, None, None))
for player in self.players:
# draw 5 cards
if player.name == "HeuristicTests":
player.hand = [self.getCardFromDeck() for i in range(self.startingHandSize - 1)]
foo = player.hand
foo.append(TESTCARD)
player.hand = foo
else:
player.hand = [self.getCardFromDeck() for i in range(self.startingHandSize)]
initNewRound(prevWinner)
while True:
# print "It is the turn of: ", self.players[self.activePlayer].name
player = self.players[self.activePlayer]
result = self.playerTurn(player)
if result == WON:
notification = Notification(WON, player, None) #hacky notification
self.notifyAll(notification)
break
else:
self.activePlayer = (1 + self.activePlayer) % len(self.players)
# closing the round off
self.gameHistory.addRound(self.roundHistory)
self.round += 1
# modify the rules every few rounds round
if (self.round % self.changeRuleRate == 0):
self.players[self.activePlayer].modifyRule(self.makeModification) #pass the method as an argument
return self.activePlayer
def playGame(self, numRounds=10):
winner = 0
roundPrint = 16
for i in range(numRounds):
if self.round % roundPrint == 0:
t0 = time.time()
winner = self.playRound(winner)
if self.round % roundPrint == 0:
t1 = time.time()
print "round", self.round, t1-t0
# /////////
#
# Commenting out for use in tests.py
#
# \\\\\\\\\
##
# Q-Learning Agent
def playTest():
qBot = QLearner('qBot', [SizeofHand(), HighCount(), LowCount(), Illegality()])
##
pHuman = HmmAgent("Learner")
pBot0 = RandomAgent("A1")
pBot2 = RandomAgent("A2")
# pBot = LearningAgent("Learner2")
pBot1 = RandomAgent("NaiveTests")
# g = Game([pHuman, pBot, pBotw, pBot1, pBot2], True)
g = Game([qBot, pBot1], True)
g.playGame(500)
# #print stats
for player in g.players:
print player.name
print player.wins
if type(player) == LearningAgent or type(player) == RandomAgent or type(player) == HmmAgent or type(player) == HeuristicAgent:
print np.average(player.validPercentByRound)
if type(player) == QLearner:
print player.weights
# # tests
# pHuman = HmmAgent("J")
# # pBotw = RandomAgent("A1")
# # pBot2 = RandomAgent("A2")
# # pBot = LearningAgent("Learner2")
# pBot1 = HmmAgent("Learner")
# # # # tests
# pHuman = HmmAgent("J")
# # pBotw = RandomAgent("A1")
# # pBot2 = RandomAgent("A2")
# # pBot = LearningAgent("Learner2")
# pBot1 = CardCounter("Learner")
# g = Game([pHuman, pBot1], True)
# g.playGame(200)
# #print stats
# for player in g.players:
# print player.name
# print player.wins
# if type(player) == CardCounter or type(player) == RandomAgent or type(player) == HmmAgent:
# try:
# print np.average(player.validPercentByRound)
# except:
# print 'div by zero'
# player_names = ['J', 'lerner']
# player_wins = [[],[]]
# player_valid = [[],[]]
# for game in range(500):
# pHuman = HmmAgent("J")
# # pBotw = RandomAgent("A1")
# # pBot2 = RandomAgent("A2")
# # pBot = LearningAgent("Learner2")
# pBot1 = cardCounter("Learner")
# print "game:", game
# g = Game([pHuman, pBot1], True)
# g.playGame(20)
# for i in range(len(g.players)):
# player_wins[i].append(g.players[i].wins)
# player_valid[i].append(np.average(g.players[i].validPercentByRound))
# #print stats
# for i in range(2):
# print player_names[i]
# print np.sum(player_wins[i])
# print np.mean(player_valid[i])
#