backtesting.py icon indicating copy to clipboard operation
backtesting.py copied to clipboard

optimize: cannot reindex from a duplicate axis

Open antonma opened this issue 3 years ago • 5 comments

Hi,

first I want to thank you for this library which is cool and easy to use.

Expected Behavior

I would like to run an optimization to get the best results for BTC 1h data. I did it already for other instruments like SP500 or DOW but unfortunately for BTC I get an exception. This could be something in my data but I don't know where to look at...

Actual Behavior

As soon as I use more than one parameter to optimize I get this exception. With one parameter it works.

d:\repos\backtesting_py_2\bt_venv\lib\site-packages\backtesting_stats.py:24: FutureWarning: reindexing with a non-unique Index is deprecated and will raise in a future version. df = df.reindex(dd.index)

ValueError                                Traceback (most recent call last)
[d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py in <module>
      <a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=107'>108</a> ##output['_trades']
      <a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=108'>109</a> #bt.plot(plot_return = False, plot_pl = True, resample=True)
----> <a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=109'>110</a> output = bt.optimize(p_center_ma=range(10, 40, 10),
      <a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=110'>111</a>                #p_lb_ma_slow=range(200, 1000, 100),
     <a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=111'>112</a>                #p_hb_ma_slow=range(50, 120, 10),

[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in optimize(self, maximize, method, max_tries, constraint, return_heatmap, return_optimization, random_state, **kwargs)
   1485 
   1486         if method == 'grid':
-> 1487             output = _optimize_grid()
   1488         elif method == 'skopt':
   1489             output = _optimize_skopt()

[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in _optimize_grid()
   1375                                       "set multiprocessing start method to 'fork'.")
   1376                     for batch_index in _tqdm(range(len(param_batches))):
-> 1377                         _, values = Backtest._mp_task(backtest_uuid, batch_index)
   1378                         for value, params in zip(values, param_batches[batch_index]):
   1379                             heatmap[tuple(params.values())] = value

[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in _mp_task(backtest_uuid, batch_index)
   1495     def _mp_task(backtest_uuid, batch_index):
   1496         bt, param_batches, maximize_func = Backtest._mp_backtests[backtest_uuid]
-> 1497         return batch_index, [maximize_func(stats) if stats['# Trades'] else np.nan
   1498                              for stats in (bt.run(**params)
   1499                                            for params in param_batches[batch_index])]

[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in <listcomp>(.0)
   1495     def _mp_task(backtest_uuid, batch_index):
   1496         bt, param_batches, maximize_func = Backtest._mp_backtests[backtest_uuid]
-> 1497         return batch_index, [maximize_func(stats) if stats['# Trades'] else np.nan
   1498                              for stats in (bt.run(**params)
   1499                                            for params in param_batches[batch_index])]

[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in <genexpr>(.0)
   1496         bt, param_batches, maximize_func = Backtest._mp_backtests[backtest_uuid]
   1497         return batch_index, [maximize_func(stats) if stats['# Trades'] else np.nan
-> 1498                              for stats in (bt.run(**params)
   1499                                            for params in param_batches[batch_index])]
   1500 

[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in run(self, **kwargs)
   1183 
   1184             equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values
-> 1185             self._results = compute_stats(
   1186                 trades=broker.closed_trades,
   1187                 equity=equity,

[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\_stats.py]() in compute_stats(trades, equity, ohlc_data, strategy_instance, risk_free_rate)
     44     index = ohlc_data.index
     45     dd = 1 - equity / np.maximum.accumulate(equity)
---> 46     dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))
     47 
     48     equity_df = pd.DataFrame({

[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\_stats.py]() in compute_drawdown_duration_peaks(dd)
     22     df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
     23     df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
---> 24     df = df.reindex(dd.index)
     25     return df['duration'], df['peak_dd']
     26 

[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\util\_decorators.py]() in wrapper(*args, **kwargs)
    322         @wraps(func)
    323         def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324             return func(*args, **kwargs)
    325 
    326         kind = inspect.Parameter.POSITIONAL_OR_KEYWORD

[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\frame.py]() in reindex(self, *args, **kwargs)
   4770         kwargs.pop("axis", None)
   4771         kwargs.pop("labels", None)
-> 4772         return super().reindex(**kwargs)
   4773 
   4774     @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])

[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\generic.py]() in reindex(self, *args, **kwargs)
   4816 
   4817         # perform the reindex on the axes
-> 4818         return self._reindex_axes(
   4819             axes, level, limit, tolerance, method, fill_value, copy
   4820         ).__finalize__(self, method="reindex")

[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\frame.py]() in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
   4595         index = axes["index"]
   4596         if index is not None:
-> 4597             frame = frame._reindex_index(
   4598                 index, method, copy, level, fill_value, limit, tolerance
   4599             )

[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\frame.py]() in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
   4614             new_index, method=method, level=level, limit=limit, tolerance=tolerance
   4615         )
-> 4616         return self._reindex_with_indexers(
   4617             {0: [new_index, indexer]},
   4618             copy=copy,

[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\generic.py]() in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
   4881 
   4882             # TODO: speed up on homogeneous DataFrame objects
-> 4883             new_data = new_data.reindex_indexer(
   4884                 index,
   4885                 indexer,

[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\internals\managers.py]() in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy, consolidate, only_slice)
    668         # some axes don't allow reindexing with dups
    669         if not allow_dups:
--> 670             self.axes[axis]._validate_can_reindex(indexer)
    671 
    672         if axis >= self.ndim:

[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\indexes\base.py]() in _validate_can_reindex(self, indexer)
   3783         # trying to reindex on an axis with duplicates
   3784         if not self._index_as_unique and len(indexer):
-> 3785             raise ValueError("cannot reindex from a duplicate axis")
   3786 
   3787     def reindex(

ValueError: cannot reindex from a duplicate axis

Steps to Reproduce


bt = Backtest(data, ClusterMACross, cash=100000, exclusive_orders=True)
output = bt.optimize(p_center_ma=range(10, 40, 10),
               #p_lb_ma_slow=range(200, 1000, 100),
               #p_hb_ma_slow=range(50, 120, 10),
               p_lb_ma_trend=range(200, 1000, 200),                 
               #p_portion=range(10, 50, 10),
               maximize='Sharpe Ratio',
               constraint=lambda param: param.p_center_ma < param.p_lb_ma_trend
               )
print(output)
print(output._strategy)

antonma avatar Feb 04 '22 09:02 antonma

Just to be sure, what is you pandas version and can you do pip install -U pandas?

kernc avatar Feb 05 '22 03:02 kernc

It is 1.4.0

(bt_venv) PS D:\repos\backtesting_py_2\bt_venv\Scripts> pip install -U pandas
Requirement already satisfied: pandas in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (1.4.0)
Requirement already satisfied: python-dateutil>=2.8.1 in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (from pandas) (2021.3)
Requirement already satisfied: numpy>=1.18.5 in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (from pandas) (1.22.2)
Requirement already satisfied: six>=1.5 in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)
(bt_venv) PS D:\repos\backtesting_py_2\bt_venv\Scripts> 

antonma avatar Feb 05 '22 09:02 antonma

Aparently, there's a pandas bug here for your input: https://github.com/kernc/backtesting.py/blob/94d20da85e278102a0cc71b27a3a35b815e11648/backtesting/_stats.py#L12-L24 Maybe you can investigate it by placing breakpoint() before the line:

    df = df.reindex(dd.index)

and see what we are trying to set.

Alternatively, provide a full reproducible MWE that can be worked on. The stats computation seems to pass correctly on CI with pandas 1.4.

kernc avatar Feb 05 '22 21:02 kernc

image This is what I see with the debugger.

antonma avatar Feb 06 '22 19:02 antonma

This is my code:

# %%
from datetime import datetime, timedelta
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
#from matplotlib import pyplot as plt
#import seaborn as sns
import pandas as pd
#import finta as TA
import math
# %%
data = pd.read_csv(
    "btc_clusters_regression_results_garch_120_close_h1_270122.csv")
data['date'] = pd.to_datetime(data['date'])
# #data.set_index('Time', inplace = True)
data = data.set_index('date')
data.columns.values[0] = 'Close'
data.columns.values[1] = 'Open'
data.columns.values[2] = 'High'
data.columns.values[3] = 'Low'
data.columns.values[4] = 'Volume'
print(data.head(10))

# %%
def SMA(values, n):    
    return pd.Series(values).rolling(n).mean()
# %%
class ClusterMACross (Strategy):    
    #ClusterMACross(p_lb_ma_slow=700,p_hb_ma_slow=80,p_lb_ma_trend=600)
    p_lb_ma_trend=200
    #p_hb_ma_trend=6000
    p_center_ma=10
    p_lb_ma_slow=600   
    p_hb_ma_slow=50
    p_portion = 33
    p_slow_ma = 21
    p_fast_ma = 8
    

    def init(self):        
        # self.garch_ma_fast = self.I(SMA, self.data.garch, 2)
        # self.angle_ma_fast = self.I(SMA, self.data.angle, 2)
        self.close_ma_slow = self.I(SMA, self.data.Close, self.p_slow_ma)
        self.close_ma_fast = self.I(SMA, self.data.Close, self.p_fast_ma)
        self.center_ma = self.I(SMA, self.data.center, self.p_center_ma)
        self.lb_ma_slow = self.I(SMA, self.data['low band'], self.p_lb_ma_slow)
        self.hb_ma_slow = self.I(SMA, self.data['high band'], self.p_hb_ma_slow)        
        self.lb_ma_trend = self.I(SMA, self.data['low band'], self.p_lb_ma_trend)        
 
 
    def next(self):                                
        if crossover(self.close_ma_fast, self.lb_ma_trend) and self.data.Close[-1] > self.lb_ma_trend[-1]:        
            self.position.close()
            self.buy(size=0.99)                    
        elif (crossover(self.lb_ma_slow, self.center_ma )):
            self.position.close()            
        # elif (crossover(self.center_ma, self.hb_ma_slow)):
        #     if (self.position.pl > 0):
        #         self.position.close(portion=self.p_portion / 100)     
        #         if (len(self.trades) > 0):
        #             print(len(self.trades))
        #             self.trades[0].sl = self.trades[0].entry_price
        # elif (crossover(self.hb_ma_slow, self.center_ma) 
        #       and self.center_ma[-1] > self.lb_ma_slow[-1]
        #       ):
        #     self.position.close(portion=self.p_portion / 100)     

# %%
bt = Backtest(data, ClusterMACross, cash=100000, exclusive_orders=True)
#output = bt.run()
#print(output)
#bt.plot(plot_return = False, plot_pl = True, resample=True)
output = bt.optimize(p_center_ma=range(10, 100, 10),
               #p_lb_ma_slow=range(200, 1000, 100),
               #p_hb_ma_slow=range(50, 120, 10),
               p_lb_ma_trend=range(200, 1000, 200),                 
               p_portion=range(10, 50, 10),
               maximize='Sortino Ratio',
               #maximize='Sharpe Ratio',
               #constraint=lambda param: param.p_lb_ma_trend < param.p_lb_ma_slow,
               constraint=lambda param: param.p_center_ma < param.p_lb_ma_trend
               )
print(output)
print(output._strategy)

Data btc_clusters_regression_results_garch_120_close_h1_270122.csv

antonma avatar Feb 06 '22 19:02 antonma