2022-07-22 13:08:35 +02:00
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pickle
|
|
|
|
|
|
|
|
'''
|
|
|
|
Class defines Computer-Player
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
class Player(ABC):
|
|
|
|
|
|
|
|
def __init__(self, name: str):
|
|
|
|
self.name = name
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def get_hash(self, board: np.array):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def feed_reward(self, reward: float) -> None:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def reset(self) -> None:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def add_state(self, state: np.array) -> None:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class ComputerPlayer(Player):
|
|
|
|
def __init__(self, name: str, board_cols: int, board_rows: int, exp_rate: float = 0.3):
|
|
|
|
super().__init__(name)
|
|
|
|
self.states = []
|
|
|
|
self.lr = 0.2
|
|
|
|
self.exp_rate = exp_rate
|
|
|
|
self.decay_gamma = 0.9
|
|
|
|
self.states_value = {}
|
|
|
|
self.board_cols = board_cols
|
|
|
|
self.board_rows = board_rows
|
|
|
|
|
|
|
|
'''
|
|
|
|
get Board hash
|
|
|
|
'''
|
|
|
|
|
|
|
|
def get_hash(self, board: np.array) -> str:
|
|
|
|
board_hash = str(board.reshape(self.board_cols * self.board_rows))
|
|
|
|
return board_hash
|
|
|
|
|
|
|
|
'''
|
|
|
|
get best action for current state
|
|
|
|
'''
|
|
|
|
|
|
|
|
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
|
2022-07-22 13:10:12 +02:00
|
|
|
# TODO: Implement this
|
|
|
|
pass
|
2022-07-22 13:08:35 +02:00
|
|
|
|
|
|
|
def add_state(self, state: np.array) -> None:
|
|
|
|
self.states.append(state)
|
|
|
|
|
|
|
|
'''
|
|
|
|
at the end of game, backpropagate and update states value
|
|
|
|
'''
|
|
|
|
|
|
|
|
def feed_reward(self, reward: float) -> None:
|
2022-07-22 13:10:12 +02:00
|
|
|
# TODO: Implement this
|
|
|
|
pass
|
2022-07-22 13:08:35 +02:00
|
|
|
|
|
|
|
def reset(self) -> None:
|
|
|
|
self.states = []
|
|
|
|
|
|
|
|
def save_policy(self) -> None:
|
|
|
|
fw = open('policy_' + str(self.name), 'wb')
|
|
|
|
pickle.dump(self.states_value, fw)
|
|
|
|
fw.close()
|
|
|
|
|
|
|
|
def load_policy(self, file) -> None:
|
|
|
|
fr = open(file, 'rb')
|
|
|
|
self.states_value = pickle.load(fr)
|
|
|
|
fr.close()
|
|
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
Class for Human-Player
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
class HumanPlayer(Player):
|
|
|
|
|
|
|
|
def __init__(self, name):
|
|
|
|
super().__init__(name)
|
|
|
|
|
|
|
|
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
|
|
|
|
while True:
|
|
|
|
row = int(input("Input your action row:"))
|
|
|
|
col = int(input("Input your action col:"))
|
|
|
|
action = (row, col)
|
|
|
|
if action in positions:
|
|
|
|
return action
|
|
|
|
|
|
|
|
# append a hash state
|
|
|
|
def add_state(self, state) -> None:
|
|
|
|
pass
|
|
|
|
|
|
|
|
# at the end of game, backpropagate and update states value
|
|
|
|
def feed_reward(self, reward: float) -> None:
|
|
|
|
pass
|
|
|
|
|
|
|
|
def get_hash(self, board: np.array):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def reset(self) -> None:
|
|
|
|
pass
|