216 lines
6.4 KiB
Python
216 lines
6.4 KiB
Python
|
import numpy as np
|
||
|
|
||
|
from Player import Player
|
||
|
|
||
|
'''
|
||
|
Class defines Boardstates, rules for winning and distinguish between pure computer game and game against a human
|
||
|
'''
|
||
|
|
||
|
|
||
|
class State:
|
||
|
def __init__(self, p1: Player, p2: Player, board_rows: int, board_cols: int):
|
||
|
self.board = np.zeros((board_rows, board_cols))
|
||
|
self.p1 = p1
|
||
|
self.p2 = p2
|
||
|
self.isEnd = False
|
||
|
self.board_hash = None
|
||
|
self.player_symbol = 1
|
||
|
self.board_cols = board_cols
|
||
|
self.board_rows = board_rows
|
||
|
|
||
|
'''
|
||
|
Get unique hash of current board state
|
||
|
'''
|
||
|
|
||
|
def get_hash(self) -> str:
|
||
|
self.board_hash = str(self.board.reshape(self.board_cols * self.board_rows))
|
||
|
return self.board_hash
|
||
|
|
||
|
'''
|
||
|
Define winning rules
|
||
|
'''
|
||
|
|
||
|
def winner(self):
|
||
|
# 3 in a row
|
||
|
for i in range(self.board_rows):
|
||
|
if sum(self.board[i, :]) == 3:
|
||
|
self.isEnd = True
|
||
|
return 1
|
||
|
if sum(self.board[i, :]) == -3:
|
||
|
self.isEnd = True
|
||
|
return -1
|
||
|
# 3 in a column
|
||
|
for i in range(self.board_cols):
|
||
|
if sum(self.board[:, i]) == 3:
|
||
|
self.isEnd = True
|
||
|
return 1
|
||
|
if sum(self.board[:, i]) == -3:
|
||
|
self.isEnd = True
|
||
|
return -1
|
||
|
# diagonal
|
||
|
diag_sum1 = sum([self.board[i, i] for i in range(self.board_cols)])
|
||
|
diag_sum2 = sum([self.board[i, self.board_cols - i - 1] for i in range(self.board_cols)])
|
||
|
diag_sum = max(abs(diag_sum1), abs(diag_sum2))
|
||
|
if diag_sum == 3:
|
||
|
self.isEnd = True
|
||
|
if diag_sum1 == 3 or diag_sum2 == 3:
|
||
|
return 1
|
||
|
else:
|
||
|
return -1
|
||
|
|
||
|
# tie
|
||
|
# no available positions
|
||
|
if len(self.available_positions()) == 0:
|
||
|
self.isEnd = True
|
||
|
return 0
|
||
|
# not end
|
||
|
self.isEnd = False
|
||
|
return None
|
||
|
|
||
|
'''
|
||
|
Returns all available positions in current state
|
||
|
'''
|
||
|
|
||
|
def available_positions(self) -> list:
|
||
|
positions = []
|
||
|
for i in range(self.board_rows):
|
||
|
for j in range(self.board_cols):
|
||
|
if self.board[i, j] == 0:
|
||
|
positions.append((i, j)) # need to be tuple
|
||
|
return positions
|
||
|
|
||
|
'''
|
||
|
Set token on a position and switch to another player
|
||
|
'''
|
||
|
|
||
|
def update_state(self, position) -> None:
|
||
|
self.board[position] = self.player_symbol
|
||
|
self.player_symbol = -1 if self.player_symbol == 1 else 1
|
||
|
|
||
|
'''
|
||
|
If game ends, backpropagate reward
|
||
|
'''
|
||
|
|
||
|
def give_reward(self) -> None:
|
||
|
result = self.winner()
|
||
|
# P1 won
|
||
|
if result == 1:
|
||
|
self.p1.feed_reward(1)
|
||
|
self.p2.feed_reward(0)
|
||
|
# P2 won
|
||
|
elif result == -1:
|
||
|
self.p1.feed_reward(0)
|
||
|
self.p2.feed_reward(1)
|
||
|
# Tie
|
||
|
else:
|
||
|
self.p1.feed_reward(0.1)
|
||
|
self.p2.feed_reward(0.5)
|
||
|
|
||
|
'''
|
||
|
Reset Board to Startposition
|
||
|
'''
|
||
|
|
||
|
def reset(self) -> None:
|
||
|
self.board = np.zeros((self.board_rows, self.board_cols))
|
||
|
self.board_hash = None
|
||
|
self.isEnd = False
|
||
|
self.player_symbol = 1
|
||
|
|
||
|
'''
|
||
|
Game with 2 Computer
|
||
|
'''
|
||
|
|
||
|
def play(self, rounds=100) -> None:
|
||
|
for i in range(rounds):
|
||
|
if i % 1000 == 0:
|
||
|
print("Rounds {}".format(i))
|
||
|
while not self.isEnd:
|
||
|
# Player 1
|
||
|
positions = self.available_positions()
|
||
|
p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
|
||
|
self.update_state(p1_action)
|
||
|
board_hash = self.get_hash()
|
||
|
self.p1.add_state(board_hash)
|
||
|
|
||
|
# Does P1 won or is it a tie?
|
||
|
win = self.winner()
|
||
|
if win is not None:
|
||
|
self.give_reward()
|
||
|
self.p1.reset()
|
||
|
self.p2.reset()
|
||
|
self.reset()
|
||
|
break
|
||
|
|
||
|
else:
|
||
|
# Player 2
|
||
|
positions = self.available_positions()
|
||
|
p2_action = self.p2.choose_action(positions, self.board, self.player_symbol)
|
||
|
self.update_state(p2_action)
|
||
|
board_hash = self.get_hash()
|
||
|
self.p2.add_state(board_hash)
|
||
|
|
||
|
# Does P2 won or is it a tie?
|
||
|
win = self.winner()
|
||
|
if win is not None:
|
||
|
self.give_reward()
|
||
|
self.p1.reset()
|
||
|
self.p2.reset()
|
||
|
self.reset()
|
||
|
break
|
||
|
|
||
|
# Game with a human
|
||
|
def play2(self) -> None:
|
||
|
while not self.isEnd:
|
||
|
# Player 1
|
||
|
positions = self.available_positions()
|
||
|
p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
|
||
|
self.update_state(p1_action)
|
||
|
self.show_board()
|
||
|
|
||
|
# Does P1 won or is it a tie?
|
||
|
win = self.winner()
|
||
|
if win is not None:
|
||
|
if win == 1:
|
||
|
print(self.p1.name, "wins!")
|
||
|
else:
|
||
|
print("tie!")
|
||
|
self.reset()
|
||
|
break
|
||
|
|
||
|
else:
|
||
|
# Player 2 (Human)
|
||
|
positions = self.available_positions()
|
||
|
p2_action = self.p2.choose_action(positions)
|
||
|
self.update_state(p2_action)
|
||
|
self.show_board()
|
||
|
|
||
|
# Does P2 won or is it a tie?
|
||
|
win = self.winner()
|
||
|
if win is not None:
|
||
|
if win == -1:
|
||
|
print(self.p2.name, "wins!")
|
||
|
else:
|
||
|
print("tie!")
|
||
|
self.reset()
|
||
|
break
|
||
|
|
||
|
'''
|
||
|
Prints current state of Board
|
||
|
'''
|
||
|
|
||
|
def show_board(self):
|
||
|
# P1: x P2: o
|
||
|
for i in range(0, self.board_rows):
|
||
|
print('-------------')
|
||
|
out = '| '
|
||
|
for j in range(0, self.board_cols):
|
||
|
if self.board[i, j] == 1:
|
||
|
token = 'x'
|
||
|
if self.board[i, j] == -1:
|
||
|
token = 'o'
|
||
|
if self.board[i, j] == 0:
|
||
|
token = ' '
|
||
|
out += token + ' | '
|
||
|
print(out)
|
||
|
print('-------------')
|