imitation
imitation copied to clipboard
Tensorboard not showing all metrics
I am trying to run Adversarial inverse training.
I am seeing the log to st_out of training as normal however the only thing that is getting registered to tensorboard is disc_logits.

My training script looks like this
from raceEnv import RacingEnv
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecMonitor
from imitation.algorithms import bc
from imitation.data import rollout
import os
import pickle
from tqdm.notebook import tqdm
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from imitation.algorithms.adversarial.gail import GAIL
from imitation.algorithms.adversarial.airl import AIRL
from imitation.data import rollout
from imitation.rewards.reward_nets import BasicRewardNet
from imitation.util.networks import RunningNorm
from imitation.scripts.train_adversarial import save
from imitation.scripts.common.rl import load_rl_algo_from_path
from imitation.rewards.serialize import load_reward
import torch as th
env = DummyVecEnv([lambda :RacingEnv()])
env = VecMonitor(VecNormalize.load('NormalisationWeights.pkl',env))
with open(r"transitions/transitions4.pkl", "rb") as input_file:
transitions = pickle.load(input_file)
#env = DummyVecEnv([lambda :RacingEnv()])
# learner = PPO(env=env, policy=MlpPolicy)
reward_net = BasicRewardNet(
env.observation_space,
env.action_space,
normalize_input_layer=RunningNorm,
)
learner = load_rl_algo_from_path(
agent_path= './airl_model/gen_policy',
venv= env,
rl_cls= PPO,
rl_kwargs={}
)
airl_trainer = AIRL(
demonstrations=transitions,
demo_batch_size=64,
gen_replay_buffer_capacity=128,
n_disc_updates_per_round=4,
venv=env,
gen_algo=learner,
reward_net=reward_net,
log_dir="./shelby_bot_tensorboard/",
init_tensorboard=True,
init_tensorboard_graph=True,
)
old_reward, _ = evaluate_policy(airl_trainer.policy, env, n_eval_episodes=1000, render=False)
print("Training a policy using AIRL")
airl_trainer.train(60000000)
reward, _ = evaluate_policy(airl_trainer.policy, env, n_eval_episodes=1000, render=False)
print(f"Reward before training: {old_reward}")
print(f"Reward after training: {reward}")
save(airl_trainer,'airl_model')
Am I missing any extra config for getting loss and average episode reward ?
Assigning to @ernestum to see if can replicate / triage
@magjack I could not figure out what RacingEnv you used. Can you give me a hint?
Also if I had the files you are loading that would be immensely helpful to replicate your issue. Looks like I would need:
./airl_model/gen_policyNormalisationWeights.pkltransitions/transitions4.pkl
@magjack ping
Hi sorry, I lost track of this and took the project in a different direction. Will close
I'm having the same issue when I try to use the tensorboard. The only information that appears is disc_logits. I tried tracing the library to see where the data is being written on the tensorboard and found imitation.algorithms.adversarial.common line 381 self._summary_writer.add_histogram("disc_logits", disc_logits.detach()) with only disc_logits. Is it possible to monitor some env information in tensorboard? Like for example gen/rollout/ep_len_mean or gen/rollout/ep_rew_mean, this information is displayed in the training log in the raw session during training.
I was able get logs to tensorboard using the custom logger
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize, VecMonitor
import pickle
import pathlib
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from imitation.algorithms.adversarial.airl import AIRL
from imitation.data import rollout
from imitation.rewards.reward_nets import BasicRewardNet
from imitation.util import logger as imit_logger
from imitation.scripts.train_adversarial import save
#from imitation.scripts.common.rl import load_rl_algo_from_path
from imitation.rewards.serialize import load_reward
import pathlib
from raceEnv import RacingEnv
total_timesteps = 100000
checkpoint_interval = total_timesteps/20
log_dir = 'shelby_bot_tensorboard/airl'
env = DummyVecEnv([lambda :RacingEnv()])
env = VecMonitor(VecNormalize.load('NormalisationWeights.pkl',env))
with open(r"transitions/transitions0.pkl", "rb") as input_file:
expert_traj = pickle.load(input_file)
#env = DummyVecEnv([lambda :RacingEnv()])
learner = PPO(env=env, policy=MlpPolicy, verbose=1 , tensorboard_log="./shelby_bot_tensorboard/")
reward_net = BasicRewardNet(
env.observation_space,
env.action_space
)
custom_logger = imit_logger.configure(
folder=log_dir,
format_strs=["tensorboard", "stdout"],
)
#learner = load_rl_algo_from_path(
# agent_path= './airl_model/gen_policy',
# venv= env,
#rl_cls= PPO,
# rl_kwargs={}
#)
airl_trainer = AIRL(
demonstrations=expert_traj,
demo_batch_size=64,
gen_replay_buffer_capacity=128,
n_disc_updates_per_round=4,
venv=env,
gen_algo=learner,
reward_net=reward_net,
log_dir="./shelby_bot_tensorboard/",
init_tensorboard=True,
init_tensorboard_graph=True,
custom_logger= custom_logger
)
def callback(round_num: int, /) -> None:
if checkpoint_interval > 0 and round_num % checkpoint_interval == 0:
save(airl_trainer, pathlib.Path(f"checkpoints/checkpoint{round_num:05d}"))
old_reward, _ = evaluate_policy(airl_trainer.policy, env, n_eval_episodes=1000, render=False)
print("Training a policy using AIRL")
airl_trainer.train(total_timesteps, callback)
reward, _ = evaluate_policy(airl_trainer.policy, env, n_eval_episodes=1000, render=False)
print(f"Reward before training: {old_reward}")
print(f"Reward after training: {reward}")
save(airl_trainer, pathlib.Path(f"checkpoints/checkpointFinal"))```
@magjack thank you very much, it really helped me a lot
@magjack Would you provide the missing files once again? It would be great if I could replicate this issue so you guys don't have to resort to a custom logger in the long term.
@magjack ping
@ernestum I don't know if it helps in any way but I had this problem using openai retro. Let me know if I can help in any way
Interestingly when trying to reproduce with seals/CartPole-v0 I get a lot of statistics but none of them is called disc_logits. I did not try with a retro env yet.
@feliperafael I tried running with the Airstriker-Genesis but I am getting out of memory errors. What environment did you use specifically?
I used SonicTheHedgehog-Genesis environment. I'm going to test with the roms that come included in the retro lib to see if I can reproduce this with any of them.
I was able to reproduce the "issue" using Airstriker-Genesis. But the tensorboard only showed disc_logits after GAIL training finished. I used 8 envs and 100000 timesteps to train