Plot graph
This commit is contained in:
		
							
								
								
									
										6
									
								
								DynaQ.py
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								DynaQ.py
									
									
									
									
									
								
							@@ -40,7 +40,6 @@ class DynaQ:
 | 
				
			|||||||
        self.state = self.env.start
 | 
					        self.state = self.env.start
 | 
				
			||||||
        self.state_actions = []
 | 
					        self.state_actions = []
 | 
				
			||||||
        self.step_in_episode = 0
 | 
					        self.step_in_episode = 0
 | 
				
			||||||
        self.env.reset()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
        Learning method for agent
 | 
					        Learning method for agent
 | 
				
			||||||
@@ -50,6 +49,7 @@ class DynaQ:
 | 
				
			|||||||
    def learn(self, epsilon_decay: float, epsilon_min: float, run: int) -> None:
 | 
					    def learn(self, epsilon_decay: float, epsilon_min: float, run: int) -> None:
 | 
				
			||||||
        self.steps_per_episode = []
 | 
					        self.steps_per_episode = []
 | 
				
			||||||
        eps = self.epsilon
 | 
					        eps = self.epsilon
 | 
				
			||||||
 | 
					        self.env.reset()
 | 
				
			||||||
        for episode in range(self.episodes):
 | 
					        for episode in range(self.episodes):
 | 
				
			||||||
            done = False
 | 
					            done = False
 | 
				
			||||||
            self.reset()
 | 
					            self.reset()
 | 
				
			||||||
@@ -84,6 +84,10 @@ class DynaQ:
 | 
				
			|||||||
            self.reset()
 | 
					            self.reset()
 | 
				
			||||||
            print("Goal")
 | 
					            print("Goal")
 | 
				
			||||||
            eps = max(epsilon_min, self.epsilon * np.exp(-epsilon_decay * episode))
 | 
					            eps = max(epsilon_min, self.epsilon * np.exp(-epsilon_decay * episode))
 | 
				
			||||||
 | 
					            if run == 9 and self.n_steps == 50 and episode==69:
 | 
				
			||||||
 | 
					                self.env.print_shortest_path(self.Q)
 | 
				
			||||||
 | 
					        if run==9 and self.n_steps==50:
 | 
				
			||||||
 | 
					            self.env.print_shortest_path(self.Q)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
        Returns epsilon-greedy action
 | 
					        Returns epsilon-greedy action
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user