2
0
Fork 0

todo: implement q learning

This commit is contained in:
Dominik Brunmeir 2022-07-22 13:10:12 +02:00
parent dd3eb11ea8
commit a228356319
1 changed files with 4 additions and 21 deletions

View File

@ -58,21 +58,8 @@ class ComputerPlayer(Player):
'''
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
if np.random.uniform(0, 1) <= self.exp_rate:
# take random action
idx = np.random.choice(len(positions))
action = positions[idx]
else:
value_max = -999
for p in positions:
next_board = current_board.copy()
next_board[p] = symbol
next_board_hash = self.get_hash(next_board)
value = 0 if self.states_value.get(next_board_hash) is None else self.states_value.get(next_board_hash)
if value >= value_max:
value_max = value
action = p
return action
# TODO: Implement this
pass
def add_state(self, state: np.array) -> None:
self.states.append(state)
@ -82,12 +69,8 @@ class ComputerPlayer(Player):
'''
def feed_reward(self, reward: float) -> None:
for st in reversed(self.states):
if self.states_value.get(st) is None:
self.states_value[st] = 0
# bellman equation
self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
reward = self.states_value[st]
# TODO: Implement this
pass
def reset(self) -> None:
self.states = []