from abc import ABC, abstractmethod import numpy as np import pickle ''' Class defines Computer-Player ''' class Player(ABC): def __init__(self, name: str): self.name = name @abstractmethod def get_hash(self, board: np.array): pass @abstractmethod def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple: pass @abstractmethod def feed_reward(self, reward: float) -> None: pass @abstractmethod def reset(self) -> None: pass @abstractmethod def add_state(self, state: np.array) -> None: pass class ComputerPlayer(Player): def __init__(self, name: str, board_cols: int, board_rows: int, exp_rate: float = 0.3): super().__init__(name) self.states = [] self.lr = 0.2 self.exp_rate = exp_rate self.decay_gamma = 0.9 self.states_value = {} self.board_cols = board_cols self.board_rows = board_rows ''' get Board hash ''' def get_hash(self, board: np.array) -> str: board_hash = str(board.reshape(self.board_cols * self.board_rows)) return board_hash ''' get best action for current state ''' def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple: # TODO: Implement this pass def add_state(self, state: np.array) -> None: self.states.append(state) ''' at the end of game, backpropagate and update states value ''' def feed_reward(self, reward: float) -> None: # TODO: Implement this pass def reset(self) -> None: self.states = [] def save_policy(self) -> None: fw = open('policy_' + str(self.name), 'wb') pickle.dump(self.states_value, fw) fw.close() def load_policy(self, file) -> None: fr = open(file, 'rb') self.states_value = pickle.load(fr) fr.close() ''' Class for Human-Player ''' class HumanPlayer(Player): def __init__(self, name): super().__init__(name) def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple: while True: row = int(input("Input your action row:")) col = int(input("Input your action col:")) action = (row, col) if action in positions: return action # append a hash state def add_state(self, state) -> None: pass # at the end of game, backpropagate and update states value def feed_reward(self, reward: float) -> None: pass def get_hash(self, board: np.array): pass def reset(self) -> None: pass