57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
|
import numpy as np
|
||
|
|
||
|
from Environment import Env
|
||
|
from DynaQ import DynaQ
|
||
|
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
|
||
|
def main():
|
||
|
# Parameters
|
||
|
alpha = 0.1
|
||
|
gamma = 0.9
|
||
|
epsilon = 1
|
||
|
epsilon_decay = 0.05
|
||
|
epsilon_min = 0.01
|
||
|
episodes = 100
|
||
|
start = 0
|
||
|
goal = 10
|
||
|
runs = 10
|
||
|
|
||
|
# Result arrays
|
||
|
results0 = np.zeros(episodes)
|
||
|
results10 = np.zeros(episodes)
|
||
|
results50 = np.zeros(episodes)
|
||
|
|
||
|
# Build environment
|
||
|
env = Env(start=start, goal=goal)
|
||
|
|
||
|
# Learn for agents with planning steps 0, 10, 50
|
||
|
# Do 10 runs for each agent and take the average of the results
|
||
|
for run in range(runs):
|
||
|
agent0 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=0, episodes=episodes)
|
||
|
agent0.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run)
|
||
|
results0 += np.array(agent0.steps_per_episode)
|
||
|
agent10 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=10, episodes=episodes)
|
||
|
agent10.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run)
|
||
|
results10 += np.array(agent10.steps_per_episode)
|
||
|
agent50 = DynaQ(env=env, alpha=alpha, gamma=gamma, epsilon=epsilon, n_steps=50, episodes=episodes)
|
||
|
agent50.learn(epsilon_min=epsilon_min, epsilon_decay=epsilon_decay, run=run)
|
||
|
results50 += np.array(agent50.steps_per_episode)
|
||
|
|
||
|
results0 = results0 / runs
|
||
|
results10 = results10 / runs
|
||
|
results50 = results50 / runs
|
||
|
|
||
|
# Plot the results
|
||
|
plt.figure()
|
||
|
plt.plot(range(episodes), results0.tolist(), label='0 planning steps')
|
||
|
plt.plot(range(episodes), results10.tolist(), label='10 planning steps')
|
||
|
plt.plot(range(episodes), results50.tolist(), label='50 planning steps')
|
||
|
plt.legend()
|
||
|
plt.show()
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|