tictactoe/Player.py

from abc import ABC, abstractmethod

import numpy as np
import pickle

'''
    Class defines Computer-Player
'''


class Player(ABC):

    def __init__(self, name: str):
        self.name = name

    @abstractmethod
    def get_hash(self, board: np.array):
        pass

    @abstractmethod
    def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
        pass

    @abstractmethod
    def feed_reward(self, reward: float) -> None:
        pass

    @abstractmethod
    def reset(self) -> None:
        pass

    @abstractmethod
    def add_state(self, state: np.array) -> None:
        pass


class ComputerPlayer(Player):
    def __init__(self, name: str, board_cols: int, board_rows: int, exp_rate: float = 0.3):
        super().__init__(name)
        self.states = []
        self.lr = 0.2
        self.exp_rate = exp_rate
        self.decay_gamma = 0.9
        self.states_value = {}
        self.board_cols = board_cols
        self.board_rows = board_rows

    '''
        get Board hash
    '''

    def get_hash(self, board: np.array) -> str:
        board_hash = str(board.reshape(self.board_cols * self.board_rows))
        return board_hash

    '''
        get best action for current state
    '''

    def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
        # TODO: Implement this
        pass

    def add_state(self, state: np.array) -> None:
        self.states.append(state)

    '''
        at the end of game, backpropagate and update states value
    '''

    def feed_reward(self, reward: float) -> None:
        # TODO: Implement this
        pass

    def reset(self) -> None:
        self.states = []

    def save_policy(self) -> None:
        fw = open('policy_' + str(self.name), 'wb')
        pickle.dump(self.states_value, fw)
        fw.close()

    def load_policy(self, file) -> None:
        fr = open(file, 'rb')
        self.states_value = pickle.load(fr)
        fr.close()


'''
    Class for Human-Player
'''


class HumanPlayer(Player):

    def __init__(self, name):
        super().__init__(name)

    def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
        while True:
            row = int(input("Input your action row:"))
            col = int(input("Input your action col:"))
            action = (row, col)
            if action in positions:
                return action

    # append a hash state
    def add_state(self, state) -> None:
        pass

    # at the end of game, backpropagate and update states value
    def feed_reward(self, reward: float) -> None:
        pass

    def get_hash(self, board: np.array):
        pass

    def reset(self) -> None:
        pass
initial commit 2022-07-22 13:08:35 +02:00			`from abc import ABC, abstractmethod`

			`import numpy as np`
			`import pickle`

			`'''`
			`Class defines Computer-Player`
			`'''`


			`class Player(ABC):`

			`def __init__(self, name: str):`
			`self.name = name`

			`@abstractmethod`
			`def get_hash(self, board: np.array):`
			`pass`

			`@abstractmethod`
			`def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:`
			`pass`

			`@abstractmethod`
			`def feed_reward(self, reward: float) -> None:`
			`pass`

			`@abstractmethod`
			`def reset(self) -> None:`
			`pass`

			`@abstractmethod`
			`def add_state(self, state: np.array) -> None:`
			`pass`


			`class ComputerPlayer(Player):`
			`def __init__(self, name: str, board_cols: int, board_rows: int, exp_rate: float = 0.3):`
			`super().__init__(name)`
			`self.states = []`
			`self.lr = 0.2`
			`self.exp_rate = exp_rate`
			`self.decay_gamma = 0.9`
			`self.states_value = {}`
			`self.board_cols = board_cols`
			`self.board_rows = board_rows`

			`'''`
			`get Board hash`
			`'''`

			`def get_hash(self, board: np.array) -> str:`
			`board_hash = str(board.reshape(self.board_cols * self.board_rows))`
			`return board_hash`

			`'''`
			`get best action for current state`
			`'''`

			`def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:`
todo: implement q learning 2022-07-22 13:10:12 +02:00			`# TODO: Implement this`
			`pass`
initial commit 2022-07-22 13:08:35 +02:00
			`def add_state(self, state: np.array) -> None:`
			`self.states.append(state)`

			`'''`
			`at the end of game, backpropagate and update states value`
			`'''`

			`def feed_reward(self, reward: float) -> None:`
todo: implement q learning 2022-07-22 13:10:12 +02:00			`# TODO: Implement this`
			`pass`
initial commit 2022-07-22 13:08:35 +02:00
			`def reset(self) -> None:`
			`self.states = []`

			`def save_policy(self) -> None:`
			`fw = open('policy_' + str(self.name), 'wb')`
			`pickle.dump(self.states_value, fw)`
			`fw.close()`

			`def load_policy(self, file) -> None:`
			`fr = open(file, 'rb')`
			`self.states_value = pickle.load(fr)`
			`fr.close()`


			`'''`
			`Class for Human-Player`
			`'''`


			`class HumanPlayer(Player):`

			`def __init__(self, name):`
			`super().__init__(name)`

			`def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:`
			`while True:`
			`row = int(input("Input your action row:"))`
			`col = int(input("Input your action col:"))`
			`action = (row, col)`
			`if action in positions:`
			`return action`

			`# append a hash state`
			`def add_state(self, state) -> None:`
			`pass`

			`# at the end of game, backpropagate and update states value`
			`def feed_reward(self, reward: float) -> None:`
			`pass`

			`def get_hash(self, board: np.array):`
			`pass`

			`def reset(self) -> None:`
			`pass`