from Player import ComputerPlayer, HumanPlayer from State import State BOARD_ROWS = 3 BOARD_COLS = 3 def tic_tac_toe(): # train with 2 computer player p1 = ComputerPlayer("p1", board_cols=BOARD_COLS, board_rows=BOARD_ROWS) p2 = ComputerPlayer("p2", board_cols=BOARD_COLS, board_rows=BOARD_ROWS) st = State(p1, p2, board_cols=BOARD_COLS, board_rows=BOARD_ROWS) print("training...") st.play(50000) p1.save_policy() # play with human p1 = ComputerPlayer("computer", board_cols=BOARD_COLS, board_rows=BOARD_ROWS, exp_rate=0) p1.load_policy("policy_p1") p2 = HumanPlayer("human") st = State(p1, p2, board_cols=BOARD_COLS, board_rows=BOARD_ROWS) st.play2() if __name__ == "__main__": tic_tac_toe()