From 21bcbf49c1a86ee56cb6c2be89faf352f2f35d8a Mon Sep 17 00:00:00 2001 From: drothschedl Date: Mon, 25 Jul 2022 11:16:22 +0200 Subject: [PATCH] Plot graph --- DynaQ.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/DynaQ.py b/DynaQ.py index c69e98c..25d6f17 100644 --- a/DynaQ.py +++ b/DynaQ.py @@ -40,7 +40,6 @@ class DynaQ: self.state = self.env.start self.state_actions = [] self.step_in_episode = 0 - self.env.reset() ''' Learning method for agent @@ -50,6 +49,7 @@ class DynaQ: def learn(self, epsilon_decay: float, epsilon_min: float, run: int) -> None: self.steps_per_episode = [] eps = self.epsilon + self.env.reset() for episode in range(self.episodes): done = False self.reset() @@ -84,6 +84,10 @@ class DynaQ: self.reset() print("Goal") eps = max(epsilon_min, self.epsilon * np.exp(-epsilon_decay * episode)) + if run == 9 and self.n_steps == 50 and episode==69: + self.env.print_shortest_path(self.Q) + if run==9 and self.n_steps==50: + self.env.print_shortest_path(self.Q) ''' Returns epsilon-greedy action