diff --git a/Player.py b/Player.py index ea1bcbe..7f6e032 100644 --- a/Player.py +++ b/Player.py @@ -58,21 +58,8 @@ class ComputerPlayer(Player): ''' def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple: - if np.random.uniform(0, 1) <= self.exp_rate: - # take random action - idx = np.random.choice(len(positions)) - action = positions[idx] - else: - value_max = -999 - for p in positions: - next_board = current_board.copy() - next_board[p] = symbol - next_board_hash = self.get_hash(next_board) - value = 0 if self.states_value.get(next_board_hash) is None else self.states_value.get(next_board_hash) - if value >= value_max: - value_max = value - action = p - return action + # TODO: Implement this + pass def add_state(self, state: np.array) -> None: self.states.append(state) @@ -82,12 +69,8 @@ class ComputerPlayer(Player): ''' def feed_reward(self, reward: float) -> None: - for st in reversed(self.states): - if self.states_value.get(st) is None: - self.states_value[st] = 0 - # bellman equation - self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st]) - reward = self.states_value[st] + # TODO: Implement this + pass def reset(self) -> None: self.states = []