Plot graph
This commit is contained in:
parent
32209b3cd6
commit
21bcbf49c1
6
DynaQ.py
6
DynaQ.py
@ -40,7 +40,6 @@ class DynaQ:
|
|||||||
self.state = self.env.start
|
self.state = self.env.start
|
||||||
self.state_actions = []
|
self.state_actions = []
|
||||||
self.step_in_episode = 0
|
self.step_in_episode = 0
|
||||||
self.env.reset()
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Learning method for agent
|
Learning method for agent
|
||||||
@ -50,6 +49,7 @@ class DynaQ:
|
|||||||
def learn(self, epsilon_decay: float, epsilon_min: float, run: int) -> None:
|
def learn(self, epsilon_decay: float, epsilon_min: float, run: int) -> None:
|
||||||
self.steps_per_episode = []
|
self.steps_per_episode = []
|
||||||
eps = self.epsilon
|
eps = self.epsilon
|
||||||
|
self.env.reset()
|
||||||
for episode in range(self.episodes):
|
for episode in range(self.episodes):
|
||||||
done = False
|
done = False
|
||||||
self.reset()
|
self.reset()
|
||||||
@ -84,6 +84,10 @@ class DynaQ:
|
|||||||
self.reset()
|
self.reset()
|
||||||
print("Goal")
|
print("Goal")
|
||||||
eps = max(epsilon_min, self.epsilon * np.exp(-epsilon_decay * episode))
|
eps = max(epsilon_min, self.epsilon * np.exp(-epsilon_decay * episode))
|
||||||
|
if run == 9 and self.n_steps == 50 and episode==69:
|
||||||
|
self.env.print_shortest_path(self.Q)
|
||||||
|
if run==9 and self.n_steps==50:
|
||||||
|
self.env.print_shortest_path(self.Q)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Returns epsilon-greedy action
|
Returns epsilon-greedy action
|
||||||
|
Loading…
Reference in New Issue
Block a user