Plot graph

This commit is contained in:
drothschedl 2022-07-25 11:16:22 +02:00
parent 32209b3cd6
commit 21bcbf49c1

View File

@ -40,7 +40,6 @@ class DynaQ:
self.state = self.env.start
self.state_actions = []
self.step_in_episode = 0
self.env.reset()
'''
Learning method for agent
@ -50,6 +49,7 @@ class DynaQ:
def learn(self, epsilon_decay: float, epsilon_min: float, run: int) -> None:
self.steps_per_episode = []
eps = self.epsilon
self.env.reset()
for episode in range(self.episodes):
done = False
self.reset()
@ -84,6 +84,10 @@ class DynaQ:
self.reset()
print("Goal")
eps = max(epsilon_min, self.epsilon * np.exp(-epsilon_decay * episode))
if run == 9 and self.n_steps == 50 and episode==69:
self.env.print_shortest_path(self.Q)
if run==9 and self.n_steps==50:
self.env.print_shortest_path(self.Q)
'''
Returns epsilon-greedy action