deep-rl-tensorflow
deep-rl-tensorflow copied to clipboard
Questions about function train in agent.py
I found there should be some modifications in agent.py. It's strange to use old history when we start playing a new game.
for self.t in tqdm(range(start_t, t_max), ncols=70, initial=start_t):
ep = (self.ep_end +
max(0., (self.ep_start - self.ep_end)
* (self.t_ep_end - max(0., self.t - self.t_learn_start)) / self.t_ep_end))
# 1. predict
action = self.predict(self.history.get(), ep)
# 2. act
observation, reward, terminal, info = self.env.step(action, is_training=True)
# 3. observe
q, loss, is_update = self.observe(observation, reward, action, terminal)
logger.debug("a: %d, r: %d, t: %d, q: %.4f, l: %.2f" % \
(action, reward, terminal, np.mean(q), loss))
if self.stat:
self.stat.on_step(self.t, action, reward, terminal,
ep, q, loss, is_update, self.learning_rate_op)
if terminal:
observation, reward, terminal = self.new_game()
## update history if the state is a terminal state
## for _ in range(self.history_length):
## self.history.add(observation)