tictactoe/Player.py

120 lines
2.7 KiB
Python
Raw Normal View History

2022-07-22 13:08:35 +02:00
from abc import ABC, abstractmethod
import numpy as np
import pickle
'''
Class defines Computer-Player
'''
class Player(ABC):
def __init__(self, name: str):
self.name = name
@abstractmethod
def get_hash(self, board: np.array):
pass
@abstractmethod
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
pass
@abstractmethod
def feed_reward(self, reward: float) -> None:
pass
@abstractmethod
def reset(self) -> None:
pass
@abstractmethod
def add_state(self, state: np.array) -> None:
pass
class ComputerPlayer(Player):
def __init__(self, name: str, board_cols: int, board_rows: int, exp_rate: float = 0.3):
super().__init__(name)
self.states = []
self.lr = 0.2
self.exp_rate = exp_rate
self.decay_gamma = 0.9
self.states_value = {}
self.board_cols = board_cols
self.board_rows = board_rows
'''
get Board hash
'''
def get_hash(self, board: np.array) -> str:
board_hash = str(board.reshape(self.board_cols * self.board_rows))
return board_hash
'''
get best action for current state
'''
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
2022-07-22 13:10:12 +02:00
# TODO: Implement this
pass
2022-07-22 13:08:35 +02:00
def add_state(self, state: np.array) -> None:
self.states.append(state)
'''
at the end of game, backpropagate and update states value
'''
def feed_reward(self, reward: float) -> None:
2022-07-22 13:10:12 +02:00
# TODO: Implement this
pass
2022-07-22 13:08:35 +02:00
def reset(self) -> None:
self.states = []
def save_policy(self) -> None:
fw = open('policy_' + str(self.name), 'wb')
pickle.dump(self.states_value, fw)
fw.close()
def load_policy(self, file) -> None:
fr = open(file, 'rb')
self.states_value = pickle.load(fr)
fr.close()
'''
Class for Human-Player
'''
class HumanPlayer(Player):
def __init__(self, name):
super().__init__(name)
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
while True:
row = int(input("Input your action row:"))
col = int(input("Input your action col:"))
action = (row, col)
if action in positions:
return action
# append a hash state
def add_state(self, state) -> None:
pass
# at the end of game, backpropagate and update states value
def feed_reward(self, reward: float) -> None:
pass
def get_hash(self, board: np.array):
pass
def reset(self) -> None:
pass