Looking for a stable baselines 3 basic application for RL Training
I am relatively new to RL, and I'm exploring this environment, I am looking for a reference code for implementation of either of hover or pole on drone environments. Any help would be appreciated.
import gymnasium as gym import PyFlyt.gym_envs # Ensure PyFlyt is installed from stable_baselines3 import PPO from stable_baselines3.common.env_util import make_vec_env from stable_baselines3.common.evaluation import evaluate_policy import wandb from wandb.integration.sb3 import WandbCallback
wandb.init( project="pyflyt-training", name="PPO_QUAD_POLE_BALANCING", sync_tensorboard=True, monitor_gym=False, )
def train_model(timesteps=2000000, save_path="ppo_quad_pole_balance"):
"""Trains PPO on the PyFlyt QuadX-Pole-Balance environment."""
train_env = make_vec_env("PyFlyt/QuadX-Pole-Balance-v3", n_envs=4)
model = PPO("MlpPolicy", train_env, verbose=1, tensorboard_log="./ppo_tensorboard/", device="cuda")
model.learn(total_timesteps=timesteps, callback=WandbCallback())
model.save(save_path)
train_env.close()
def test_model(load_path="ppo_quad_pole_balance", num_episodes=50): """Tests the trained PPO model with rendering for multiple episodes.""" model = PPO.load(load_path) test_env = gym.make("PyFlyt/QuadX-Pole-Balance-v3", render_mode="human")
for episode in range(num_episodes):
obs, _ = test_env.reset()
done = False
episode_reward = 0
while not done:
action, _states = model.predict(obs, deterministic=True)
obs, reward, terminated, truncated, _ = test_env.step(action)
episode_reward += reward
done = terminated or truncated
print(f"Episode {episode + 1}/{num_episodes} - Total Reward: {episode_reward:.2f}")
test_env.close()
if name == "main": train_model() # Train without rendering test_model() # Test for 50 episodes with rendering
# Close WandB
wandb.finish()
You can go through this very fundamental application of stable baselines 3 based application of PPO algorithm.
@NishantChandna1403 Hello! I'm a beginner in reinforcement learning and I hope to get your help. I would be very grateful if you could provide an example of training and testing a fixed-wing UAV to follow waypoints using PyFlyt and stable_baselines3. I'm looking forward to your reply and thank you very much!