import numpy as np from Player import Player ''' Class defines Boardstates, rules for winning and distinguish between pure computer game and game against a human ''' class State: def __init__(self, p1: Player, p2: Player, board_rows: int, board_cols: int): self.board = np.zeros((board_rows, board_cols)) self.p1 = p1 self.p2 = p2 self.isEnd = False self.board_hash = None self.player_symbol = 1 self.board_cols = board_cols self.board_rows = board_rows ''' Get unique hash of current board state ''' def get_hash(self) -> str: self.board_hash = str(self.board.reshape(self.board_cols * self.board_rows)) return self.board_hash ''' Define winning rules ''' def winner(self): # 3 in a row for i in range(self.board_rows): if sum(self.board[i, :]) == 3: self.isEnd = True return 1 if sum(self.board[i, :]) == -3: self.isEnd = True return -1 # 3 in a column for i in range(self.board_cols): if sum(self.board[:, i]) == 3: self.isEnd = True return 1 if sum(self.board[:, i]) == -3: self.isEnd = True return -1 # diagonal diag_sum1 = sum([self.board[i, i] for i in range(self.board_cols)]) diag_sum2 = sum([self.board[i, self.board_cols - i - 1] for i in range(self.board_cols)]) diag_sum = max(abs(diag_sum1), abs(diag_sum2)) if diag_sum == 3: self.isEnd = True if diag_sum1 == 3 or diag_sum2 == 3: return 1 else: return -1 # tie # no available positions if len(self.available_positions()) == 0: self.isEnd = True return 0 # not end self.isEnd = False return None ''' Returns all available positions in current state ''' def available_positions(self) -> list: positions = [] for i in range(self.board_rows): for j in range(self.board_cols): if self.board[i, j] == 0: positions.append((i, j)) # need to be tuple return positions ''' Set token on a position and switch to another player ''' def update_state(self, position) -> None: self.board[position] = self.player_symbol self.player_symbol = -1 if self.player_symbol == 1 else 1 ''' If game ends, backpropagate reward ''' def give_reward(self) -> None: result = self.winner() # P1 won if result == 1: self.p1.feed_reward(1) self.p2.feed_reward(0) # P2 won elif result == -1: self.p1.feed_reward(0) self.p2.feed_reward(1) # Tie else: self.p1.feed_reward(0.1) self.p2.feed_reward(0.5) ''' Reset Board to Startposition ''' def reset(self) -> None: self.board = np.zeros((self.board_rows, self.board_cols)) self.board_hash = None self.isEnd = False self.player_symbol = 1 ''' Game with 2 Computer ''' def play(self, rounds=100) -> None: for i in range(rounds): if i % 1000 == 0: print("Rounds {}".format(i)) while not self.isEnd: # Player 1 positions = self.available_positions() p1_action = self.p1.choose_action(positions, self.board, self.player_symbol) self.update_state(p1_action) board_hash = self.get_hash() self.p1.add_state(board_hash) # Does P1 won or is it a tie? win = self.winner() if win is not None: self.give_reward() self.p1.reset() self.p2.reset() self.reset() break else: # Player 2 positions = self.available_positions() p2_action = self.p2.choose_action(positions, self.board, self.player_symbol) self.update_state(p2_action) board_hash = self.get_hash() self.p2.add_state(board_hash) # Does P2 won or is it a tie? win = self.winner() if win is not None: self.give_reward() self.p1.reset() self.p2.reset() self.reset() break # Game with a human def play2(self) -> None: while not self.isEnd: # Player 1 positions = self.available_positions() p1_action = self.p1.choose_action(positions, self.board, self.player_symbol) self.update_state(p1_action) self.show_board() # Does P1 won or is it a tie? win = self.winner() if win is not None: if win == 1: print(self.p1.name, "wins!") else: print("tie!") self.reset() break else: # Player 2 (Human) positions = self.available_positions() p2_action = self.p2.choose_action(positions) self.update_state(p2_action) self.show_board() # Does P2 won or is it a tie? win = self.winner() if win is not None: if win == -1: print(self.p2.name, "wins!") else: print("tie!") self.reset() break ''' Prints current state of Board ''' def show_board(self): # P1: x P2: o for i in range(0, self.board_rows): print('-------------') out = '| ' for j in range(0, self.board_cols): if self.board[i, j] == 1: token = 'x' if self.board[i, j] == -1: token = 'o' if self.board[i, j] == 0: token = ' ' out += token + ' | ' print(out) print('-------------')