ElegantRL
ElegantRL copied to clipboard
Can you provide a test demo, reproduce the optimal policy and save the video?
Can you provide a test demo, reproduce the optimal policy and save the video? (e.g. for LunarLanderContinuous-v2 or BipedalWalker-v3)
I have written a test demo that may help. However, the function def save_or_load_agent(self, cwd: str, if_save: bool) in AgentBase.py has to be modified a little: def save_or_load_agent(self, cwd: str, if_save: bool): ... if if_save: for name, obj in name_obj_list: save_path = f"{cwd}/{name}.pth" torch.save(obj.state_dict(), save_path) else: for name, obj in name_obj_list: save_path = f"{cwd}/{name}.pth" load_torch_file(obj, save_path) if os.path.isfile(save_path) else None return self.act,self.act_target,self.act_optim,self.cri,self.cri_target, self.cri_optim
import torch from elegantrl.train.utils import init_agent from elegantrl.train.config import build_env
import gym from elegantrl.agents.AgentSAC import AgentSAC, AgentModSAC from elegantrl.envs.Gym import get_gym_env_args from elegantrl.train.config import Arguments
get_gym_env_args(gym.make('LunarLanderContinuous-v2'), if_print=True)
env_func = gym.make env_args = { 'env_num': 1, 'env_name': 'LunarLanderContinuous-v2', 'max_step': 1000, 'state_dim': 8, 'action_dim': 2, 'if_discrete': False, 'target_return': 200, 'id': 'LunarLanderContinuous-v2' }
args = Arguments(agent=AgentModSAC(), env_func=env_func, env_args=env_args) args.init_before_training() # necessary! learner_gpu = args.learner_gpus[0] env = build_env(env=args.env, env_func=args.env_func, env_args=args.env_args, gpu_id=learner_gpu) agent = init_agent(args, gpu_id=learner_gpu, env=env) cwd = args.cwd
act,b,c,d,e,f = agent.save_or_load_agent(cwd, False)
act.load_state_dict(torch.load("actor.pth"))
s=env.reset() print(s,s.shape) for i in range(1000): action=act.get_action(torch.tensor(s)) # agent.train() next_state,reward,done,_=env.step(action.detach().numpy()) if done: s=env.reset() state=next_state env.render()
Thanks! We are looking into your codes.