todo: implement q learning

This commit is contained in:
Dominik Brunmeir 2022-07-22 13:10:12 +02:00
parent dd3eb11ea8
commit a228356319

View File

@ -58,21 +58,8 @@ class ComputerPlayer(Player):
''' '''
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple: def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
if np.random.uniform(0, 1) <= self.exp_rate: # TODO: Implement this
# take random action pass
idx = np.random.choice(len(positions))
action = positions[idx]
else:
value_max = -999
for p in positions:
next_board = current_board.copy()
next_board[p] = symbol
next_board_hash = self.get_hash(next_board)
value = 0 if self.states_value.get(next_board_hash) is None else self.states_value.get(next_board_hash)
if value >= value_max:
value_max = value
action = p
return action
def add_state(self, state: np.array) -> None: def add_state(self, state: np.array) -> None:
self.states.append(state) self.states.append(state)
@ -82,12 +69,8 @@ class ComputerPlayer(Player):
''' '''
def feed_reward(self, reward: float) -> None: def feed_reward(self, reward: float) -> None:
for st in reversed(self.states): # TODO: Implement this
if self.states_value.get(st) is None: pass
self.states_value[st] = 0
# bellman equation
self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
reward = self.states_value[st]
def reset(self) -> None: def reset(self) -> None:
self.states = [] self.states = []