import numpy as np from Environment import Env from DynaQ import DynaQ import matplotlib.pyplot as plt def main(): # Parameters alpha = 0.1 gamma = 0.9 epsilon = 1 epsilon_decay = 0.05 epsilon_min = 0.01 episodes = 100 start = 0 goal = 10 runs = 10 # Result arrays results0 = np.zeros(episodes) results10 = np.zeros(episodes) results50 = np.zeros(episodes) # Build environment env = Env(start=start, goal=goal) # Learn for agents with planning steps 0, 10, 50 # Do 10 runs for each agent and take the average of the results for run in range(runs): agent0 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=0, episodes=episodes) agent0.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run) results0 += np.array(agent0.steps_per_episode) agent10 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=10, episodes=episodes) agent10.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run) results10 += np.array(agent10.steps_per_episode) agent50 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=50, episodes=episodes) agent50.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run) results50 += np.array(agent50.steps_per_episode) results0 = results0 / runs results10 = results10 / runs results50 = results50 / runs # Plot the results plt.figure() plt.plot(range(episodes), results0.tolist(), label='0 planning steps') plt.plot(range(episodes), results10.tolist(), label='10 planning steps') plt.plot(range(episodes), results50.tolist(), label='50 planning steps') plt.legend() plt.show() if __name__ == '__main__': main()