Compare_agent.py

Open vicks4u opened this issue 3 months ago • 0 comments

""" RL + MetaTrader5 trading bot template

Train with historical data (PPO from stable-baselines3)
Optionally execute trades via MetaTrader5 (set LIVE=True to enable) CAVEAT: This is an educational template. Backtest & paper-trade first. """

import time import numpy as np import pandas as pd import gym from gym import spaces import MetaTrader5 as mt5 from stable_baselines3 import PPO from stable_baselines3.common.vec_env import DummyVecEnv from stable_baselines3.common.callbacks import CheckpointCallback

-------------------------

USER CONFIG

-------------------------

SYMBOL = "EURUSD" TIMEFRAME = mt5.TIMEFRAME_M5 # 5 minute bars LOOKBACK = 50 # observation window (bars) START_POS = 0 # for historical fetch offset LOT_SIZE = 0.01 # trade lot size LIVE = False # <-- Set to True only after full testing (demo account first!) MODEL_PATH = "ppo_mt5_model" TRAIN_TIMESTEPS = 20000 # adjust as you like

-------------------------

Helper: connect to MT5

-------------------------

def mt5_connect(): if not mt5.initialize(): raise RuntimeError(f"MT5 initialize() failed, error={mt5.last_error()}") info = mt5.terminal_info() if info is None: raise RuntimeError("Failed to get terminal info after initialize()") print("MT5 terminal initialized:", info.product) # Ensure symbol is available if not mt5.symbol_select(SYMBOL, True): raise RuntimeError(f"Failed to select symbol {SYMBOL}") return True

def mt5_shutdown(): mt5.shutdown()

-------------------------

Get historical OHLCV

-------------------------

def fetch_bars(symbol, timeframe, n_bars): # copy_rates_from_pos returns numpy array with fields: time, open, high, low, close, tick_volume, ... rates = mt5.copy_rates_from_pos(symbol, timeframe, START_POS, n_bars) if rates is None: raise RuntimeError(f"Failed to fetch rates for {symbol}: {mt5.last_error()}") df = pd.DataFrame(rates) df['time'] = pd.to_datetime(df['time'], unit='s') return df

-------------------------

Simple trading Gym env

-------------------------

class MT5TradingEnv(gym.Env): """ Observation: last LOOKBACK closes normalized + current position (0/1/-1) Actions: 0=hold, 1=buy (long), 2=sell (short/close long) Reward: change in account equity approximated by price moves * position NOTE: Simplified; this is a research template, not production-ready. """ def init(self, df: pd.DataFrame, lookback=LOOKBACK): super(MT5TradingEnv, self).init() self.df = df.reset_index(drop=True) self.lookback = lookback self.ptr = lookback # current index in df self.position = 0 # -1 short, 0 flat, 1 long self.entry_price = 0.0 # Observations: lookback closes (normalized) + position self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(lookback + 1,), dtype=np.float32) # Actions: hold(0), buy(1), sell(2) self.action_space = spaces.Discrete(3)

def _get_obs(self):
    closes = self.df.loc[self.ptr - self.lookback:self.ptr - 1, "close"].values.astype(np.float32)
    # normalize closes by dividing by last close
    norm = closes / (closes[-1] + 1e-9) - 1.0
    obs = np.concatenate([norm, np.array([float(self.position)])], axis=0)
    return obs

def reset(self):
    self.ptr = self.lookback
    self.position = 0
    self.entry_price = 0.0
    return self._get_obs()

def step(self, action):
    done = False
    reward = 0.0
    price = float(self.df.loc[self.ptr, "close"])
    # Action logic
    if action == 1:  # buy
        if self.position == 0:
            self.position = 1
            self.entry_price = price
        elif self.position == -1:
            # close short and go long
            reward += (self.entry_price - price)  # profit from short
            self.position = 1
            self.entry_price = price
    elif action == 2:  # sell
        if self.position == 0:
            self.position = -1
            self.entry_price = price
        elif self.position == 1:
            reward += (price - self.entry_price)  # profit from long
            self.position = -1
            self.entry_price = price
    # Move pointer
    self.ptr += 1
    if self.ptr >= len(self.df):
        done = True
    else:
        # reward can also be shaped by unrealized pnl:
        next_price = float(self.df.loc[self.ptr, "close"])
        unrealized = 0.0
        if self.position == 1:
            unrealized = next_price - self.entry_price
        elif self.position == -1:
            unrealized = self.entry_price - next_price
        # small per-step reward = unrealized PnL scaled
        reward += unrealized * 0.1

    obs = self._get_obs() if not done else np.zeros(self.observation_space.shape, dtype=np.float32)
    info = {"ptr": self.ptr}
    return obs, float(reward), done, info

-------------------------

Order helpers

-------------------------

def send_order(symbol, action, lot=LOT_SIZE, deviation=20): """ action: 1=buy, 2=sell This function sends a ORDER_TYPE_BUY / ORDER_TYPE_SELL market order. Basic error checking included. For production you need more robust code. """ price = mt5.symbol_info_tick(symbol).ask if action == 1 else mt5.symbol_info_tick(symbol).bid request = { "action": mt5.TRADE_ACTION_DEAL, "symbol": symbol, "volume": float(lot), "type": mt5.ORDER_TYPE_BUY if action == 1 else mt5.ORDER_TYPE_SELL, "price": float(price), "deviation": deviation, "magic": 234000, "comment": "RL-bot", "type_filling": mt5.ORDER_FILLING_IOC, } result = mt5.order_send(request) return result

-------------------------

Main: training flow

-------------------------

def train_agent(): mt5_connect() # fetch historical bars n_bars = 5000 df = fetch_bars(SYMBOL, TIMEFRAME, n_bars) print(f"Fetched {len(df)} bars for {SYMBOL}") # Create env env = DummyVecEnv([lambda: MT5TradingEnv(df, lookback=LOOKBACK)]) # model model = PPO("MlpPolicy", env, verbose=1) # save checkpoints cb = CheckpointCallback(save_freq=5000, save_path="./logs/", name_prefix="ppo_mt5") model.learn(total_timesteps=TRAIN_TIMESTEPS, callback=cb) model.save(MODEL_PATH) mt5_shutdown() print("Training complete, model saved to", MODEL_PATH)

-------------------------

Real-time execution loop (paper/live)

-------------------------

def run_live_loop(model_path=MODEL_PATH, poll_seconds=5): mt5_connect() model = PPO.load(model_path) print("Loaded model:", model_path) # We'll maintain a small in-memory buffer of recent bars n_history = LOOKBACK + 10 df = fetch_bars(SYMBOL, TIMEFRAME, n_history) # pointer is at last bar while True: try: latest = fetch_bars(SYMBOL, TIMEFRAME, 1) if latest['time'].iloc[-1] > df['time'].iloc[-1]: # append new bar df = pd.concat([df, latest]).reset_index(drop=True) if len(df) > n_history: df = df.iloc[-n_history:].reset_index(drop=True) # Build an env instance for this single-step decision env = MT5TradingEnv(df, lookback=LOOKBACK) obs = env.reset() action, _states = model.predict(obs, deterministic=True) print(f"[{pd.to_datetime('now')}] Action: {action} | Price: {df['close'].iloc[-1]}") # Send order if LIVE if LIVE: if int(action) == 1: res = send_order(SYMBOL, 1) print("Order send result:", res) elif int(action) == 2: res = send_order(SYMBOL, 2) print("Order send result:", res) else: # paper-trade: just log what would happen print("LIVE=False -> paper trade logged only") else: # no new bar yet pass time.sleep(poll_seconds) except KeyboardInterrupt: print("Stopping live loop (KeyboardInterrupt).") break except Exception as e: print("Exception in live loop:", str(e)) time.sleep(5) mt5_shutdown()

-------------------------

If run as script

-------------------------

if name == "main": import argparse parser = argparse.ArgumentParser() parser.add_argument("--mode", choices=["train", "run"], default="train") args = parser.parse_args() if args.mode == "train": print("Starting training...") train_agent() elif args.mode == "run": print("Starting live/paper run loop...") run_live_loop()

Sep 16 '25 01:09 vicks4u