2
0
Fork 0
tictactoe/Player.py

137 lines
3.6 KiB
Python

from abc import ABC, abstractmethod
import numpy as np
import pickle
'''
Class defines Computer-Player
'''
class Player(ABC):
def __init__(self, name: str):
self.name = name
@abstractmethod
def get_hash(self, board: np.array):
pass
@abstractmethod
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
pass
@abstractmethod
def feed_reward(self, reward: float) -> None:
pass
@abstractmethod
def reset(self) -> None:
pass
@abstractmethod
def add_state(self, state: np.array) -> None:
pass
class ComputerPlayer(Player):
def __init__(self, name: str, board_cols: int, board_rows: int, exp_rate: float = 0.3):
super().__init__(name)
self.states = []
self.lr = 0.2
self.exp_rate = exp_rate
self.decay_gamma = 0.9
self.states_value = {}
self.board_cols = board_cols
self.board_rows = board_rows
'''
get Board hash
'''
def get_hash(self, board: np.array) -> str:
board_hash = str(board.reshape(self.board_cols * self.board_rows))
return board_hash
'''
get best action for current state
'''
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
if np.random.uniform(0, 1) <= self.exp_rate:
# take random action
idx = np.random.choice(len(positions))
action = positions[idx]
else:
value_max = -999
for p in positions:
next_board = current_board.copy()
next_board[p] = symbol
next_board_hash = self.get_hash(next_board)
value = 0 if self.states_value.get(next_board_hash) is None else self.states_value.get(next_board_hash)
if value >= value_max:
value_max = value
action = p
return action
def add_state(self, state: np.array) -> None:
self.states.append(state)
'''
at the end of game, backpropagate and update states value
'''
def feed_reward(self, reward: float) -> None:
for st in reversed(self.states):
if self.states_value.get(st) is None:
self.states_value[st] = 0
# bellman equation
self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
reward = self.states_value[st]
def reset(self) -> None:
self.states = []
def save_policy(self) -> None:
fw = open('policy_' + str(self.name), 'wb')
pickle.dump(self.states_value, fw)
fw.close()
def load_policy(self, file) -> None:
fr = open(file, 'rb')
self.states_value = pickle.load(fr)
fr.close()
'''
Class for Human-Player
'''
class HumanPlayer(Player):
def __init__(self, name):
super().__init__(name)
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
while True:
row = int(input("Input your action row:"))
col = int(input("Input your action col:"))
action = (row, col)
if action in positions:
return action
# append a hash state
def add_state(self, state) -> None:
pass
# at the end of game, backpropagate and update states value
def feed_reward(self, reward: float) -> None:
pass
def get_hash(self, board: np.array):
pass
def reset(self) -> None:
pass