todo: implement q learning
This commit is contained in:
parent
dd3eb11ea8
commit
a228356319
25
Player.py
25
Player.py
@ -58,21 +58,8 @@ class ComputerPlayer(Player):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
|
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
|
||||||
if np.random.uniform(0, 1) <= self.exp_rate:
|
# TODO: Implement this
|
||||||
# take random action
|
pass
|
||||||
idx = np.random.choice(len(positions))
|
|
||||||
action = positions[idx]
|
|
||||||
else:
|
|
||||||
value_max = -999
|
|
||||||
for p in positions:
|
|
||||||
next_board = current_board.copy()
|
|
||||||
next_board[p] = symbol
|
|
||||||
next_board_hash = self.get_hash(next_board)
|
|
||||||
value = 0 if self.states_value.get(next_board_hash) is None else self.states_value.get(next_board_hash)
|
|
||||||
if value >= value_max:
|
|
||||||
value_max = value
|
|
||||||
action = p
|
|
||||||
return action
|
|
||||||
|
|
||||||
def add_state(self, state: np.array) -> None:
|
def add_state(self, state: np.array) -> None:
|
||||||
self.states.append(state)
|
self.states.append(state)
|
||||||
@ -82,12 +69,8 @@ class ComputerPlayer(Player):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def feed_reward(self, reward: float) -> None:
|
def feed_reward(self, reward: float) -> None:
|
||||||
for st in reversed(self.states):
|
# TODO: Implement this
|
||||||
if self.states_value.get(st) is None:
|
pass
|
||||||
self.states_value[st] = 0
|
|
||||||
# bellman equation
|
|
||||||
self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
|
|
||||||
reward = self.states_value[st]
|
|
||||||
|
|
||||||
def reset(self) -> None:
|
def reset(self) -> None:
|
||||||
self.states = []
|
self.states = []
|
||||||
|
Loading…
Reference in New Issue
Block a user