tictactoe/State.py

import numpy as np

from Player import Player

'''
    Class defines Boardstates, rules for winning and distinguish between pure computer game and game against a human
'''


class State:
    def __init__(self, p1: Player, p2: Player, board_rows: int, board_cols: int):
        self.board = np.zeros((board_rows, board_cols))
        self.p1 = p1
        self.p2 = p2
        self.isEnd = False
        self.board_hash = None
        self.player_symbol = 1
        self.board_cols = board_cols
        self.board_rows = board_rows

    '''
        Get unique hash of current board state
    '''

    def get_hash(self) -> str:
        self.board_hash = str(self.board.reshape(self.board_cols * self.board_rows))
        return self.board_hash

    '''
        Define winning rules
    '''

    def winner(self):
        # 3 in a row
        for i in range(self.board_rows):
            if sum(self.board[i, :]) == 3:
                self.isEnd = True
                return 1
            if sum(self.board[i, :]) == -3:
                self.isEnd = True
                return -1
        # 3 in a column
        for i in range(self.board_cols):
            if sum(self.board[:, i]) == 3:
                self.isEnd = True
                return 1
            if sum(self.board[:, i]) == -3:
                self.isEnd = True
                return -1
        # diagonal
        diag_sum1 = sum([self.board[i, i] for i in range(self.board_cols)])
        diag_sum2 = sum([self.board[i, self.board_cols - i - 1] for i in range(self.board_cols)])
        diag_sum = max(abs(diag_sum1), abs(diag_sum2))
        if diag_sum == 3:
            self.isEnd = True
            if diag_sum1 == 3 or diag_sum2 == 3:
                return 1
            else:
                return -1

        # tie
        # no available positions
        if len(self.available_positions()) == 0:
            self.isEnd = True
            return 0
        # not end
        self.isEnd = False
        return None

    '''
        Returns all available positions in current state
    '''

    def available_positions(self) -> list:
        positions = []
        for i in range(self.board_rows):
            for j in range(self.board_cols):
                if self.board[i, j] == 0:
                    positions.append((i, j))  # need to be tuple
        return positions

    '''
        Set token on a position and switch to another player
    '''

    def update_state(self, position) -> None:
        self.board[position] = self.player_symbol
        self.player_symbol = -1 if self.player_symbol == 1 else 1

    '''
        If game ends, backpropagate reward
    '''

    def give_reward(self) -> None:
        result = self.winner()
        # P1 won
        if result == 1:
            self.p1.feed_reward(1)
            self.p2.feed_reward(0)
        # P2 won
        elif result == -1:
            self.p1.feed_reward(0)
            self.p2.feed_reward(1)
        # Tie
        else:
            self.p1.feed_reward(0.1)
            self.p2.feed_reward(0.5)

    '''
        Reset Board to Startposition
    '''

    def reset(self) -> None:
        self.board = np.zeros((self.board_rows, self.board_cols))
        self.board_hash = None
        self.isEnd = False
        self.player_symbol = 1

    '''
        Game with 2 Computer
    '''

    def play(self, rounds=100) -> None:
        for i in range(rounds):
            if i % 1000 == 0:
                print("Rounds {}".format(i))
            while not self.isEnd:
                # Player 1
                positions = self.available_positions()
                p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
                self.update_state(p1_action)
                board_hash = self.get_hash()
                self.p1.add_state(board_hash)

                # Does P1 won or is it a tie?
                win = self.winner()
                if win is not None:
                    self.give_reward()
                    self.p1.reset()
                    self.p2.reset()
                    self.reset()
                    break

                else:
                    # Player 2
                    positions = self.available_positions()
                    p2_action = self.p2.choose_action(positions, self.board, self.player_symbol)
                    self.update_state(p2_action)
                    board_hash = self.get_hash()
                    self.p2.add_state(board_hash)

                    # Does P2 won or is it a tie?
                    win = self.winner()
                    if win is not None:
                        self.give_reward()
                        self.p1.reset()
                        self.p2.reset()
                        self.reset()
                        break

    # Game with a human
    def play2(self) -> None:
        while not self.isEnd:
            # Player 1
            positions = self.available_positions()
            p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
            self.update_state(p1_action)
            self.show_board()

            # Does P1 won or is it a tie?
            win = self.winner()
            if win is not None:
                if win == 1:
                    print(self.p1.name, "wins!")
                else:
                    print("tie!")
                self.reset()
                break

            else:
                # Player 2 (Human)
                positions = self.available_positions()
                p2_action = self.p2.choose_action(positions)
                self.update_state(p2_action)
                self.show_board()

                # Does P2 won or is it a tie?
                win = self.winner()
                if win is not None:
                    if win == -1:
                        print(self.p2.name, "wins!")
                    else:
                        print("tie!")
                    self.reset()
                    break

    '''
        Prints current state of Board
    '''

    def show_board(self):
        # P1: x  P2: o
        for i in range(0, self.board_rows):
            print('-------------')
            out = '| '
            for j in range(0, self.board_cols):
                if self.board[i, j] == 1:
                    token = 'x'
                if self.board[i, j] == -1:
                    token = 'o'
                if self.board[i, j] == 0:
                    token = ' '
                out += token + ' | '
            print(out)
        print('-------------')
initial commit 2022-07-22 13:08:35 +02:00			`import numpy as np`

			`from Player import Player`

			`'''`
			`Class defines Boardstates, rules for winning and distinguish between pure computer game and game against a human`
			`'''`


			`class State:`
			`def __init__(self, p1: Player, p2: Player, board_rows: int, board_cols: int):`
			`self.board = np.zeros((board_rows, board_cols))`
			`self.p1 = p1`
			`self.p2 = p2`
			`self.isEnd = False`
			`self.board_hash = None`
			`self.player_symbol = 1`
			`self.board_cols = board_cols`
			`self.board_rows = board_rows`

			`'''`
			`Get unique hash of current board state`
			`'''`

			`def get_hash(self) -> str:`
			`self.board_hash = str(self.board.reshape(self.board_cols * self.board_rows))`
			`return self.board_hash`

			`'''`
			`Define winning rules`
			`'''`

			`def winner(self):`
			`# 3 in a row`
			`for i in range(self.board_rows):`
			`if sum(self.board[i, :]) == 3:`
			`self.isEnd = True`
			`return 1`
			`if sum(self.board[i, :]) == -3:`
			`self.isEnd = True`
			`return -1`
			`# 3 in a column`
			`for i in range(self.board_cols):`
			`if sum(self.board[:, i]) == 3:`
			`self.isEnd = True`
			`return 1`
			`if sum(self.board[:, i]) == -3:`
			`self.isEnd = True`
			`return -1`
			`# diagonal`
			`diag_sum1 = sum([self.board[i, i] for i in range(self.board_cols)])`
			`diag_sum2 = sum([self.board[i, self.board_cols - i - 1] for i in range(self.board_cols)])`
			`diag_sum = max(abs(diag_sum1), abs(diag_sum2))`
			`if diag_sum == 3:`
			`self.isEnd = True`
			`if diag_sum1 == 3 or diag_sum2 == 3:`
			`return 1`
			`else:`
			`return -1`

			`# tie`
			`# no available positions`
			`if len(self.available_positions()) == 0:`
			`self.isEnd = True`
			`return 0`
			`# not end`
			`self.isEnd = False`
			`return None`

			`'''`
			`Returns all available positions in current state`
			`'''`

			`def available_positions(self) -> list:`
			`positions = []`
			`for i in range(self.board_rows):`
			`for j in range(self.board_cols):`
			`if self.board[i, j] == 0:`
			`positions.append((i, j)) # need to be tuple`
			`return positions`

			`'''`
			`Set token on a position and switch to another player`
			`'''`

			`def update_state(self, position) -> None:`
			`self.board[position] = self.player_symbol`
			`self.player_symbol = -1 if self.player_symbol == 1 else 1`

			`'''`
			`If game ends, backpropagate reward`
			`'''`

			`def give_reward(self) -> None:`
			`result = self.winner()`
			`# P1 won`
			`if result == 1:`
			`self.p1.feed_reward(1)`
			`self.p2.feed_reward(0)`
			`# P2 won`
			`elif result == -1:`
			`self.p1.feed_reward(0)`
			`self.p2.feed_reward(1)`
			`# Tie`
			`else:`
			`self.p1.feed_reward(0.1)`
			`self.p2.feed_reward(0.5)`

			`'''`
			`Reset Board to Startposition`
			`'''`

			`def reset(self) -> None:`
			`self.board = np.zeros((self.board_rows, self.board_cols))`
			`self.board_hash = None`
			`self.isEnd = False`
			`self.player_symbol = 1`

			`'''`
			`Game with 2 Computer`
			`'''`

			`def play(self, rounds=100) -> None:`
			`for i in range(rounds):`
			`if i % 1000 == 0:`
			`print("Rounds {}".format(i))`
			`while not self.isEnd:`
			`# Player 1`
			`positions = self.available_positions()`
			`p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)`
			`self.update_state(p1_action)`
			`board_hash = self.get_hash()`
			`self.p1.add_state(board_hash)`

			`# Does P1 won or is it a tie?`
			`win = self.winner()`
			`if win is not None:`
			`self.give_reward()`
			`self.p1.reset()`
			`self.p2.reset()`
			`self.reset()`
			`break`

			`else:`
			`# Player 2`
			`positions = self.available_positions()`
			`p2_action = self.p2.choose_action(positions, self.board, self.player_symbol)`
			`self.update_state(p2_action)`
			`board_hash = self.get_hash()`
			`self.p2.add_state(board_hash)`

			`# Does P2 won or is it a tie?`
			`win = self.winner()`
			`if win is not None:`
			`self.give_reward()`
			`self.p1.reset()`
			`self.p2.reset()`
			`self.reset()`
			`break`

			`# Game with a human`
			`def play2(self) -> None:`
			`while not self.isEnd:`
			`# Player 1`
			`positions = self.available_positions()`
			`p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)`
			`self.update_state(p1_action)`
			`self.show_board()`

			`# Does P1 won or is it a tie?`
			`win = self.winner()`
			`if win is not None:`
			`if win == 1:`
			`print(self.p1.name, "wins!")`
			`else:`
			`print("tie!")`
			`self.reset()`
			`break`

			`else:`
			`# Player 2 (Human)`
			`positions = self.available_positions()`
			`p2_action = self.p2.choose_action(positions)`
			`self.update_state(p2_action)`
			`self.show_board()`

			`# Does P2 won or is it a tie?`
			`win = self.winner()`
			`if win is not None:`
			`if win == -1:`
			`print(self.p2.name, "wins!")`
			`else:`
			`print("tie!")`
			`self.reset()`
			`break`

			`'''`
			`Prints current state of Board`
			`'''`

			`def show_board(self):`
			`# P1: x P2: o`
			`for i in range(0, self.board_rows):`
			`print('-------------')`
			`out = '\| '`
			`for j in range(0, self.board_cols):`
			`if self.board[i, j] == 1:`
			`token = 'x'`
			`if self.board[i, j] == -1:`
			`token = 'o'`
			`if self.board[i, j] == 0:`
			`token = ' '`
			`out += token + ' \| '`
			`print(out)`
			`print('-------------')`