initial commit
This commit is contained in:
commit
dd3eb11ea8
25
.gitignore
vendored
Normal file
25
.gitignore
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
# ---> macOS
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
# Thumbnails
|
||||
._*
|
||||
__pycache__
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
run_old.py
|
||||
.idea
|
||||
venv
|
136
Player.py
Normal file
136
Player.py
Normal file
@ -0,0 +1,136 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
import pickle
|
||||
|
||||
'''
|
||||
Class defines Computer-Player
|
||||
'''
|
||||
|
||||
|
||||
class Player(ABC):
|
||||
|
||||
def __init__(self, name: str):
|
||||
self.name = name
|
||||
|
||||
@abstractmethod
|
||||
def get_hash(self, board: np.array):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def feed_reward(self, reward: float) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def reset(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def add_state(self, state: np.array) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class ComputerPlayer(Player):
|
||||
def __init__(self, name: str, board_cols: int, board_rows: int, exp_rate: float = 0.3):
|
||||
super().__init__(name)
|
||||
self.states = []
|
||||
self.lr = 0.2
|
||||
self.exp_rate = exp_rate
|
||||
self.decay_gamma = 0.9
|
||||
self.states_value = {}
|
||||
self.board_cols = board_cols
|
||||
self.board_rows = board_rows
|
||||
|
||||
'''
|
||||
get Board hash
|
||||
'''
|
||||
|
||||
def get_hash(self, board: np.array) -> str:
|
||||
board_hash = str(board.reshape(self.board_cols * self.board_rows))
|
||||
return board_hash
|
||||
|
||||
'''
|
||||
get best action for current state
|
||||
'''
|
||||
|
||||
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
|
||||
if np.random.uniform(0, 1) <= self.exp_rate:
|
||||
# take random action
|
||||
idx = np.random.choice(len(positions))
|
||||
action = positions[idx]
|
||||
else:
|
||||
value_max = -999
|
||||
for p in positions:
|
||||
next_board = current_board.copy()
|
||||
next_board[p] = symbol
|
||||
next_board_hash = self.get_hash(next_board)
|
||||
value = 0 if self.states_value.get(next_board_hash) is None else self.states_value.get(next_board_hash)
|
||||
if value >= value_max:
|
||||
value_max = value
|
||||
action = p
|
||||
return action
|
||||
|
||||
def add_state(self, state: np.array) -> None:
|
||||
self.states.append(state)
|
||||
|
||||
'''
|
||||
at the end of game, backpropagate and update states value
|
||||
'''
|
||||
|
||||
def feed_reward(self, reward: float) -> None:
|
||||
for st in reversed(self.states):
|
||||
if self.states_value.get(st) is None:
|
||||
self.states_value[st] = 0
|
||||
# bellman equation
|
||||
self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
|
||||
reward = self.states_value[st]
|
||||
|
||||
def reset(self) -> None:
|
||||
self.states = []
|
||||
|
||||
def save_policy(self) -> None:
|
||||
fw = open('policy_' + str(self.name), 'wb')
|
||||
pickle.dump(self.states_value, fw)
|
||||
fw.close()
|
||||
|
||||
def load_policy(self, file) -> None:
|
||||
fr = open(file, 'rb')
|
||||
self.states_value = pickle.load(fr)
|
||||
fr.close()
|
||||
|
||||
|
||||
'''
|
||||
Class for Human-Player
|
||||
'''
|
||||
|
||||
|
||||
class HumanPlayer(Player):
|
||||
|
||||
def __init__(self, name):
|
||||
super().__init__(name)
|
||||
|
||||
def choose_action(self, positions: list, current_board: np.array = None, symbol: int = -1) -> tuple:
|
||||
while True:
|
||||
row = int(input("Input your action row:"))
|
||||
col = int(input("Input your action col:"))
|
||||
action = (row, col)
|
||||
if action in positions:
|
||||
return action
|
||||
|
||||
# append a hash state
|
||||
def add_state(self, state) -> None:
|
||||
pass
|
||||
|
||||
# at the end of game, backpropagate and update states value
|
||||
def feed_reward(self, reward: float) -> None:
|
||||
pass
|
||||
|
||||
def get_hash(self, board: np.array):
|
||||
pass
|
||||
|
||||
def reset(self) -> None:
|
||||
pass
|
215
State.py
Normal file
215
State.py
Normal file
@ -0,0 +1,215 @@
|
||||
import numpy as np
|
||||
|
||||
from Player import Player
|
||||
|
||||
'''
|
||||
Class defines Boardstates, rules for winning and distinguish between pure computer game and game against a human
|
||||
'''
|
||||
|
||||
|
||||
class State:
|
||||
def __init__(self, p1: Player, p2: Player, board_rows: int, board_cols: int):
|
||||
self.board = np.zeros((board_rows, board_cols))
|
||||
self.p1 = p1
|
||||
self.p2 = p2
|
||||
self.isEnd = False
|
||||
self.board_hash = None
|
||||
self.player_symbol = 1
|
||||
self.board_cols = board_cols
|
||||
self.board_rows = board_rows
|
||||
|
||||
'''
|
||||
Get unique hash of current board state
|
||||
'''
|
||||
|
||||
def get_hash(self) -> str:
|
||||
self.board_hash = str(self.board.reshape(self.board_cols * self.board_rows))
|
||||
return self.board_hash
|
||||
|
||||
'''
|
||||
Define winning rules
|
||||
'''
|
||||
|
||||
def winner(self):
|
||||
# 3 in a row
|
||||
for i in range(self.board_rows):
|
||||
if sum(self.board[i, :]) == 3:
|
||||
self.isEnd = True
|
||||
return 1
|
||||
if sum(self.board[i, :]) == -3:
|
||||
self.isEnd = True
|
||||
return -1
|
||||
# 3 in a column
|
||||
for i in range(self.board_cols):
|
||||
if sum(self.board[:, i]) == 3:
|
||||
self.isEnd = True
|
||||
return 1
|
||||
if sum(self.board[:, i]) == -3:
|
||||
self.isEnd = True
|
||||
return -1
|
||||
# diagonal
|
||||
diag_sum1 = sum([self.board[i, i] for i in range(self.board_cols)])
|
||||
diag_sum2 = sum([self.board[i, self.board_cols - i - 1] for i in range(self.board_cols)])
|
||||
diag_sum = max(abs(diag_sum1), abs(diag_sum2))
|
||||
if diag_sum == 3:
|
||||
self.isEnd = True
|
||||
if diag_sum1 == 3 or diag_sum2 == 3:
|
||||
return 1
|
||||
else:
|
||||
return -1
|
||||
|
||||
# tie
|
||||
# no available positions
|
||||
if len(self.available_positions()) == 0:
|
||||
self.isEnd = True
|
||||
return 0
|
||||
# not end
|
||||
self.isEnd = False
|
||||
return None
|
||||
|
||||
'''
|
||||
Returns all available positions in current state
|
||||
'''
|
||||
|
||||
def available_positions(self) -> list:
|
||||
positions = []
|
||||
for i in range(self.board_rows):
|
||||
for j in range(self.board_cols):
|
||||
if self.board[i, j] == 0:
|
||||
positions.append((i, j)) # need to be tuple
|
||||
return positions
|
||||
|
||||
'''
|
||||
Set token on a position and switch to another player
|
||||
'''
|
||||
|
||||
def update_state(self, position) -> None:
|
||||
self.board[position] = self.player_symbol
|
||||
self.player_symbol = -1 if self.player_symbol == 1 else 1
|
||||
|
||||
'''
|
||||
If game ends, backpropagate reward
|
||||
'''
|
||||
|
||||
def give_reward(self) -> None:
|
||||
result = self.winner()
|
||||
# P1 won
|
||||
if result == 1:
|
||||
self.p1.feed_reward(1)
|
||||
self.p2.feed_reward(0)
|
||||
# P2 won
|
||||
elif result == -1:
|
||||
self.p1.feed_reward(0)
|
||||
self.p2.feed_reward(1)
|
||||
# Tie
|
||||
else:
|
||||
self.p1.feed_reward(0.1)
|
||||
self.p2.feed_reward(0.5)
|
||||
|
||||
'''
|
||||
Reset Board to Startposition
|
||||
'''
|
||||
|
||||
def reset(self) -> None:
|
||||
self.board = np.zeros((self.board_rows, self.board_cols))
|
||||
self.board_hash = None
|
||||
self.isEnd = False
|
||||
self.player_symbol = 1
|
||||
|
||||
'''
|
||||
Game with 2 Computer
|
||||
'''
|
||||
|
||||
def play(self, rounds=100) -> None:
|
||||
for i in range(rounds):
|
||||
if i % 1000 == 0:
|
||||
print("Rounds {}".format(i))
|
||||
while not self.isEnd:
|
||||
# Player 1
|
||||
positions = self.available_positions()
|
||||
p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
|
||||
self.update_state(p1_action)
|
||||
board_hash = self.get_hash()
|
||||
self.p1.add_state(board_hash)
|
||||
|
||||
# Does P1 won or is it a tie?
|
||||
win = self.winner()
|
||||
if win is not None:
|
||||
self.give_reward()
|
||||
self.p1.reset()
|
||||
self.p2.reset()
|
||||
self.reset()
|
||||
break
|
||||
|
||||
else:
|
||||
# Player 2
|
||||
positions = self.available_positions()
|
||||
p2_action = self.p2.choose_action(positions, self.board, self.player_symbol)
|
||||
self.update_state(p2_action)
|
||||
board_hash = self.get_hash()
|
||||
self.p2.add_state(board_hash)
|
||||
|
||||
# Does P2 won or is it a tie?
|
||||
win = self.winner()
|
||||
if win is not None:
|
||||
self.give_reward()
|
||||
self.p1.reset()
|
||||
self.p2.reset()
|
||||
self.reset()
|
||||
break
|
||||
|
||||
# Game with a human
|
||||
def play2(self) -> None:
|
||||
while not self.isEnd:
|
||||
# Player 1
|
||||
positions = self.available_positions()
|
||||
p1_action = self.p1.choose_action(positions, self.board, self.player_symbol)
|
||||
self.update_state(p1_action)
|
||||
self.show_board()
|
||||
|
||||
# Does P1 won or is it a tie?
|
||||
win = self.winner()
|
||||
if win is not None:
|
||||
if win == 1:
|
||||
print(self.p1.name, "wins!")
|
||||
else:
|
||||
print("tie!")
|
||||
self.reset()
|
||||
break
|
||||
|
||||
else:
|
||||
# Player 2 (Human)
|
||||
positions = self.available_positions()
|
||||
p2_action = self.p2.choose_action(positions)
|
||||
self.update_state(p2_action)
|
||||
self.show_board()
|
||||
|
||||
# Does P2 won or is it a tie?
|
||||
win = self.winner()
|
||||
if win is not None:
|
||||
if win == -1:
|
||||
print(self.p2.name, "wins!")
|
||||
else:
|
||||
print("tie!")
|
||||
self.reset()
|
||||
break
|
||||
|
||||
'''
|
||||
Prints current state of Board
|
||||
'''
|
||||
|
||||
def show_board(self):
|
||||
# P1: x P2: o
|
||||
for i in range(0, self.board_rows):
|
||||
print('-------------')
|
||||
out = '| '
|
||||
for j in range(0, self.board_cols):
|
||||
if self.board[i, j] == 1:
|
||||
token = 'x'
|
||||
if self.board[i, j] == -1:
|
||||
token = 'o'
|
||||
if self.board[i, j] == 0:
|
||||
token = ' '
|
||||
out += token + ' | '
|
||||
print(out)
|
||||
print('-------------')
|
29
main.py
Normal file
29
main.py
Normal file
@ -0,0 +1,29 @@
|
||||
from Player import ComputerPlayer, HumanPlayer
|
||||
from State import State
|
||||
|
||||
BOARD_ROWS = 3
|
||||
BOARD_COLS = 3
|
||||
|
||||
|
||||
def tic_tac_toe():
|
||||
# train with 2 computer player
|
||||
p1 = ComputerPlayer("p1", board_cols=BOARD_COLS, board_rows=BOARD_ROWS)
|
||||
p2 = ComputerPlayer("p2", board_cols=BOARD_COLS, board_rows=BOARD_ROWS)
|
||||
|
||||
st = State(p1, p2, board_cols=BOARD_COLS, board_rows=BOARD_ROWS)
|
||||
print("training...")
|
||||
st.play(50000)
|
||||
p1.save_policy()
|
||||
|
||||
# play with human
|
||||
p1 = ComputerPlayer("computer", board_cols=BOARD_COLS, board_rows=BOARD_ROWS, exp_rate=0)
|
||||
p1.load_policy("policy_p1")
|
||||
|
||||
p2 = HumanPlayer("human")
|
||||
|
||||
st = State(p1, p2, board_cols=BOARD_COLS, board_rows=BOARD_ROWS)
|
||||
st.play2()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tic_tac_toe()
|
Loading…
Reference in New Issue
Block a user