muzero
muzero copied to clipboard
About the x-axis of the CartPole learning curve figure
Coach.py
def learn(self) -> None: for i in range(1, self.args.num_selfplay_iterations + 1): print(f'------ITER {i}------') if not self.update_on_checkpoint or i > 1: # else: go directly to backpropagation
# Self-play/ Gather training data.
iteration_train_examples = list()
scores = list()
for _ in trange(self.args.num_episodes, desc="Self Play", file=sys.stdout):
self.mcts.clear_tree()
game_history, score = self.executeEpisode()