2
0
Fork 0
graph/main.py

57 lines
1.8 KiB
Python

import numpy as np
from Environment import Env
from DynaQ import DynaQ
import matplotlib.pyplot as plt
def main():
# Parameters
alpha = 0.1
gamma = 0.9
epsilon = 1
epsilon_decay = 0.05
epsilon_min = 0.01
episodes = 100
start = 0
goal = 10
runs = 10
# Result arrays
results0 = np.zeros(episodes)
results10 = np.zeros(episodes)
results50 = np.zeros(episodes)
# Build environment
env = Env(start=start, goal=goal)
# Learn for agents with planning steps 0, 10, 50
# Do 10 runs for each agent and take the average of the results
for run in range(runs):
agent0 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=0, episodes=episodes)
agent0.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run)
results0 += np.array(agent0.steps_per_episode)
agent10 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=10, episodes=episodes)
agent10.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run)
results10 += np.array(agent10.steps_per_episode)
agent50 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=50, episodes=episodes)
agent50.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run)
results50 += np.array(agent50.steps_per_episode)
results0 = results0 / runs
results10 = results10 / runs
results50 = results50 / runs
# Plot the results
plt.figure()
plt.plot(range(episodes), results0.tolist(), label='0 planning steps')
plt.plot(range(episodes), results10.tolist(), label='10 planning steps')
plt.plot(range(episodes), results50.tolist(), label='50 planning steps')
plt.legend()
plt.show()
if __name__ == '__main__':
main()