tictactoe/Player.py

from abc import ABC, abstractmethod

import numpy as np
import pickle

'''
    Class defines Computer-Player
'''


class Player(ABC):

    def __init__(self, name: str):
        self.name = name

    @abstractmethod
    def get_hash(self, board: np.array):
        pass

    @abstractmethod
    def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
        pass

    @abstractmethod
    def feed_reward(self, reward: float) -> None:
        pass

    @abstractmethod
    def reset(self) -> None:
        pass

    @abstractmethod
    def add_state(self, state: np.array) -> None:
        pass


class ComputerPlayer(Player):
    def __init__(self, name: str, board_cols: int, board_rows: int, exp_rate: float = 0.3):
        super().__init__(name)
        self.states = []
        self.lr = 0.2
        self.exp_rate = exp_rate
        self.decay_gamma = 0.9
        self.states_value = {}
        self.board_cols = board_cols
        self.board_rows = board_rows

    '''
        get Board hash
    '''

    def get_hash(self, board: np.array) -> str:
        board_hash = str(board.reshape(self.board_cols * self.board_rows))
        return board_hash

    '''
        get best action for current state
    '''

    def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
        # TODO: Implement this
        pass

    def add_state(self, state: np.array) -> None:
        self.states.append(state)

    '''
        at the end of game, backpropagate and update states value
    '''

    def feed_reward(self, reward: float) -> None:
        # TODO: Implement this
        pass

    def reset(self) -> None:
        self.states = []

    def save_policy(self) -> None:
        fw = open('policy_' + str(self.name), 'wb')
        pickle.dump(self.states_value, fw)
        fw.close()

    def load_policy(self, file) -> None:
        fr = open(file, 'rb')
        self.states_value = pickle.load(fr)
        fr.close()


'''
    Class for Human-Player
'''


class HumanPlayer(Player):

    def __init__(self, name):
        super().__init__(name)

    def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
        while True:
            row = int(input("Input your action row:"))
            col = int(input("Input your action col:"))
            action = (row, col)
            if action in positions:
                return action

    # append a hash state
    def add_state(self, state) -> None:
        pass

    # at the end of game, backpropagate and update states value
    def feed_reward(self, reward: float) -> None:
        pass

    def get_hash(self, board: np.array):
        pass

    def reset(self) -> None:
        pass