tictactoe/State.py

216 lines
6.4 KiB
Python
Raw Normal View History

2022-07-22 13:08:35 +02:00
import numpy as np
from Player import Player
'''
Class defines Boardstates, rules for winning and distinguish between pure computer game and game against a human
'''
class State:
def __init__(self, p1: Player, p2: Player, board_rows: int, board_cols: int):
self.board = np.zeros((board_rows, board_cols))
self.p1 = p1
self.p2 = p2
self.isEnd = False
self.board_hash = None
self.player_symbol = 1
self.board_cols = board_cols
self.board_rows = board_rows
'''
Get unique hash of current board state
'''
def get_hash(self) -> str:
self.board_hash = str(self.board.reshape(self.board_cols * self.board_rows))
return self.board_hash
'''
Define winning rules
'''
def winner(self):
# 3 in a row
for i in range(self.board_rows):
if sum(self.board[i, :]) == 3:
self.isEnd = True
return 1
if sum(self.board[i, :]) == -3:
self.isEnd = True
return -1
# 3 in a column
for i in range(self.board_cols):
if sum(self.board[:, i]) == 3:
self.isEnd = True
return 1
if sum(self.board[:, i]) == -3:
self.isEnd = True
return -1
# diagonal
diag_sum1 = sum([self.board[i, i] for i in range(self.board_cols)])
diag_sum2 = sum([self.board[i, self.board_cols - i - 1] for i in range(self.board_cols)])
diag_sum = max(abs(diag_sum1), abs(diag_sum2))
if diag_sum == 3:
self.isEnd = True
if diag_sum1 == 3 or diag_sum2 == 3:
return 1
else:
return -1
# tie
# no available positions
if len(self.available_positions()) == 0:
self.isEnd = True
return 0
# not end
self.isEnd = False
return None
'''
Returns all available positions in current state
'''
def available_positions(self) -> list:
positions = []
for i in range(self.board_rows):
for j in range(self.board_cols):
if self.board[i, j] == 0:
positions.append((i, j)) # need to be tuple
return positions
'''
Set token on a position and switch to another player
'''
def update_state(self, position) -> None:
self.board[position] = self.player_symbol
self.player_symbol = -1 if self.player_symbol == 1 else 1
'''
If game ends, backpropagate reward
'''
def give_reward(self) -> None:
result = self.winner()
# P1 won
if result == 1:
self.p1.feed_reward(1)
self.p2.feed_reward(0)
# P2 won
elif result == -1:
self.p1.feed_reward(0)
self.p2.feed_reward(1)
# Tie
else:
self.p1.feed_reward(0.1)
self.p2.feed_reward(0.5)
'''
Reset Board to Startposition
'''
def reset(self) -> None:
self.board = np.zeros((self.board_rows, self.board_cols))
self.board_hash = None
self.isEnd = False
self.player_symbol = 1
'''
Game with 2 Computer
'''
def play(self, rounds=100) -> None:
for i in range(rounds):
if i % 1000 == 0:
print("Rounds {}".format(i))
while not self.isEnd:
# Player 1
positions = self.available_positions()
p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
self.update_state(p1_action)
board_hash = self.get_hash()
self.p1.add_state(board_hash)
# Does P1 won or is it a tie?
win = self.winner()
if win is not None:
self.give_reward()
self.p1.reset()
self.p2.reset()
self.reset()
break
else:
# Player 2
positions = self.available_positions()
p2_action = self.p2.choose_action(positions, self.board, self.player_symbol)
self.update_state(p2_action)
board_hash = self.get_hash()
self.p2.add_state(board_hash)
# Does P2 won or is it a tie?
win = self.winner()
if win is not None:
self.give_reward()
self.p1.reset()
self.p2.reset()
self.reset()
break
# Game with a human
def play2(self) -> None:
while not self.isEnd:
# Player 1
positions = self.available_positions()
p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
self.update_state(p1_action)
self.show_board()
# Does P1 won or is it a tie?
win = self.winner()
if win is not None:
if win == 1:
print(self.p1.name, "wins!")
else:
print("tie!")
self.reset()
break
else:
# Player 2 (Human)
positions = self.available_positions()
p2_action = self.p2.choose_action(positions)
self.update_state(p2_action)
self.show_board()
# Does P2 won or is it a tie?
win = self.winner()
if win is not None:
if win == -1:
print(self.p2.name, "wins!")
else:
print("tie!")
self.reset()
break
'''
Prints current state of Board
'''
def show_board(self):
# P1: x P2: o
for i in range(0, self.board_rows):
print('-------------')
out = '| '
for j in range(0, self.board_cols):
if self.board[i, j] == 1:
token = 'x'
if self.board[i, j] == -1:
token = 'o'
if self.board[i, j] == 0:
token = ' '
out += token + ' | '
print(out)
print('-------------')