tictactoe/State.py

import numpy as np

from Player import Player

'''
    Class defines Boardstates, rules for winning and distinguish between pure computer game and game against a human
'''


class State:
    def __init__(self, p1: Player, p2: Player, board_rows: int, board_cols: int):
        self.board = np.zeros((board_rows, board_cols))
        self.p1 = p1
        self.p2 = p2
        self.isEnd = False
        self.board_hash = None
        self.player_symbol = 1
        self.board_cols = board_cols
        self.board_rows = board_rows

    '''
        Get unique hash of current board state
    '''

    def get_hash(self) -> str:
        self.board_hash = str(self.board.reshape(self.board_cols * self.board_rows))
        return self.board_hash

    '''
        Define winning rules
    '''

    def winner(self):
        # 3 in a row
        for i in range(self.board_rows):
            if sum(self.board[i, :]) == 3:
                self.isEnd = True
                return 1
            if sum(self.board[i, :]) == -3:
                self.isEnd = True
                return -1
        # 3 in a column
        for i in range(self.board_cols):
            if sum(self.board[:, i]) == 3:
                self.isEnd = True
                return 1
            if sum(self.board[:, i]) == -3:
                self.isEnd = True
                return -1
        # diagonal
        diag_sum1 = sum([self.board[i, i] for i in range(self.board_cols)])
        diag_sum2 = sum([self.board[i, self.board_cols - i - 1] for i in range(self.board_cols)])
        diag_sum = max(abs(diag_sum1), abs(diag_sum2))
        if diag_sum == 3:
            self.isEnd = True
            if diag_sum1 == 3 or diag_sum2 == 3:
                return 1
            else:
                return -1

        # tie
        # no available positions
        if len(self.available_positions()) == 0:
            self.isEnd = True
            return 0
        # not end
        self.isEnd = False
        return None

    '''
        Returns all available positions in current state
    '''

    def available_positions(self) -> list:
        positions = []
        for i in range(self.board_rows):
            for j in range(self.board_cols):
                if self.board[i, j] == 0:
                    positions.append((i, j))  # need to be tuple
        return positions

    '''
        Set token on a position and switch to another player
    '''

    def update_state(self, position) -> None:
        self.board[position] = self.player_symbol
        self.player_symbol = -1 if self.player_symbol == 1 else 1

    '''
        If game ends, backpropagate reward
    '''

    def give_reward(self) -> None:
        result = self.winner()
        # P1 won
        if result == 1:
            self.p1.feed_reward(1)
            self.p2.feed_reward(0)
        # P2 won
        elif result == -1:
            self.p1.feed_reward(0)
            self.p2.feed_reward(1)
        # Tie
        else:
            self.p1.feed_reward(0.1)
            self.p2.feed_reward(0.5)

    '''
        Reset Board to Startposition
    '''

    def reset(self) -> None:
        self.board = np.zeros((self.board_rows, self.board_cols))
        self.board_hash = None
        self.isEnd = False
        self.player_symbol = 1

    '''
        Game with 2 Computer
    '''

    def play(self, rounds=100) -> None:
        for i in range(rounds):
            if i % 1000 == 0:
                print("Rounds {}".format(i))
            while not self.isEnd:
                # Player 1
                positions = self.available_positions()
                p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
                self.update_state(p1_action)
                board_hash = self.get_hash()
                self.p1.add_state(board_hash)

                # Does P1 won or is it a tie?
                win = self.winner()
                if win is not None:
                    self.give_reward()
                    self.p1.reset()
                    self.p2.reset()
                    self.reset()
                    break

                else:
                    # Player 2
                    positions = self.available_positions()
                    p2_action = self.p2.choose_action(positions, self.board, self.player_symbol)
                    self.update_state(p2_action)
                    board_hash = self.get_hash()
                    self.p2.add_state(board_hash)

                    # Does P2 won or is it a tie?
                    win = self.winner()
                    if win is not None:
                        self.give_reward()
                        self.p1.reset()
                        self.p2.reset()
                        self.reset()
                        break

    # Game with a human
    def play2(self) -> None:
        while not self.isEnd:
            # Player 1
            positions = self.available_positions()
            p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
            self.update_state(p1_action)
            self.show_board()

            # Does P1 won or is it a tie?
            win = self.winner()
            if win is not None:
                if win == 1:
                    print(self.p1.name, "wins!")
                else:
                    print("tie!")
                self.reset()
                break

            else:
                # Player 2 (Human)
                positions = self.available_positions()
                p2_action = self.p2.choose_action(positions)
                self.update_state(p2_action)
                self.show_board()

                # Does P2 won or is it a tie?
                win = self.winner()
                if win is not None:
                    if win == -1:
                        print(self.p2.name, "wins!")
                    else:
                        print("tie!")
                    self.reset()
                    break

    '''
        Prints current state of Board
    '''

    def show_board(self):
        # P1: x  P2: o
        for i in range(0, self.board_rows):
            print('-------------')
            out = '| '
            for j in range(0, self.board_cols):
                if self.board[i, j] == 1:
                    token = 'x'
                if self.board[i, j] == -1:
                    token = 'o'
                if self.board[i, j] == 0:
                    token = ' '
                out += token + ' | '
            print(out)
        print('-------------')