backtesting.py
backtesting.py copied to clipboard
optimize: cannot reindex from a duplicate axis
Hi,
first I want to thank you for this library which is cool and easy to use.
Expected Behavior
I would like to run an optimization to get the best results for BTC 1h data. I did it already for other instruments like SP500 or DOW but unfortunately for BTC I get an exception. This could be something in my data but I don't know where to look at...
Actual Behavior
As soon as I use more than one parameter to optimize I get this exception. With one parameter it works.
d:\repos\backtesting_py_2\bt_venv\lib\site-packages\backtesting_stats.py:24: FutureWarning: reindexing with a non-unique Index is deprecated and will raise in a future version. df = df.reindex(dd.index)
ValueError Traceback (most recent call last)
[d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py in <module>
<a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=107'>108</a> ##output['_trades']
<a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=108'>109</a> #bt.plot(plot_return = False, plot_pl = True, resample=True)
----> <a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=109'>110</a> output = bt.optimize(p_center_ma=range(10, 40, 10),
<a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=110'>111</a> #p_lb_ma_slow=range(200, 1000, 100),
<a href='[file://d:\repos\backtesting_py\btc_backtest_cluster]() buy only close lb h1.py?line=111'>112</a> #p_hb_ma_slow=range(50, 120, 10),
[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in optimize(self, maximize, method, max_tries, constraint, return_heatmap, return_optimization, random_state, **kwargs)
1485
1486 if method == 'grid':
-> 1487 output = _optimize_grid()
1488 elif method == 'skopt':
1489 output = _optimize_skopt()
[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in _optimize_grid()
1375 "set multiprocessing start method to 'fork'.")
1376 for batch_index in _tqdm(range(len(param_batches))):
-> 1377 _, values = Backtest._mp_task(backtest_uuid, batch_index)
1378 for value, params in zip(values, param_batches[batch_index]):
1379 heatmap[tuple(params.values())] = value
[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in _mp_task(backtest_uuid, batch_index)
1495 def _mp_task(backtest_uuid, batch_index):
1496 bt, param_batches, maximize_func = Backtest._mp_backtests[backtest_uuid]
-> 1497 return batch_index, [maximize_func(stats) if stats['# Trades'] else np.nan
1498 for stats in (bt.run(**params)
1499 for params in param_batches[batch_index])]
[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in <listcomp>(.0)
1495 def _mp_task(backtest_uuid, batch_index):
1496 bt, param_batches, maximize_func = Backtest._mp_backtests[backtest_uuid]
-> 1497 return batch_index, [maximize_func(stats) if stats['# Trades'] else np.nan
1498 for stats in (bt.run(**params)
1499 for params in param_batches[batch_index])]
[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in <genexpr>(.0)
1496 bt, param_batches, maximize_func = Backtest._mp_backtests[backtest_uuid]
1497 return batch_index, [maximize_func(stats) if stats['# Trades'] else np.nan
-> 1498 for stats in (bt.run(**params)
1499 for params in param_batches[batch_index])]
1500
[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\backtesting.py]() in run(self, **kwargs)
1183
1184 equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values
-> 1185 self._results = compute_stats(
1186 trades=broker.closed_trades,
1187 equity=equity,
[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\_stats.py]() in compute_stats(trades, equity, ohlc_data, strategy_instance, risk_free_rate)
44 index = ohlc_data.index
45 dd = 1 - equity / np.maximum.accumulate(equity)
---> 46 dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))
47
48 equity_df = pd.DataFrame({
[d:\repos\backtesting_py\.venv\lib\site-packages\backtesting\_stats.py]() in compute_drawdown_duration_peaks(dd)
22 df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
23 df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
---> 24 df = df.reindex(dd.index)
25 return df['duration'], df['peak_dd']
26
[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\util\_decorators.py]() in wrapper(*args, **kwargs)
322 @wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
325
326 kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\frame.py]() in reindex(self, *args, **kwargs)
4770 kwargs.pop("axis", None)
4771 kwargs.pop("labels", None)
-> 4772 return super().reindex(**kwargs)
4773
4774 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\generic.py]() in reindex(self, *args, **kwargs)
4816
4817 # perform the reindex on the axes
-> 4818 return self._reindex_axes(
4819 axes, level, limit, tolerance, method, fill_value, copy
4820 ).__finalize__(self, method="reindex")
[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\frame.py]() in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4595 index = axes["index"]
4596 if index is not None:
-> 4597 frame = frame._reindex_index(
4598 index, method, copy, level, fill_value, limit, tolerance
4599 )
[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\frame.py]() in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
4614 new_index, method=method, level=level, limit=limit, tolerance=tolerance
4615 )
-> 4616 return self._reindex_with_indexers(
4617 {0: [new_index, indexer]},
4618 copy=copy,
[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\generic.py]() in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
4881
4882 # TODO: speed up on homogeneous DataFrame objects
-> 4883 new_data = new_data.reindex_indexer(
4884 index,
4885 indexer,
[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\internals\managers.py]() in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy, consolidate, only_slice)
668 # some axes don't allow reindexing with dups
669 if not allow_dups:
--> 670 self.axes[axis]._validate_can_reindex(indexer)
671
672 if axis >= self.ndim:
[d:\repos\backtesting_py\.venv\lib\site-packages\pandas\core\indexes\base.py]() in _validate_can_reindex(self, indexer)
3783 # trying to reindex on an axis with duplicates
3784 if not self._index_as_unique and len(indexer):
-> 3785 raise ValueError("cannot reindex from a duplicate axis")
3786
3787 def reindex(
ValueError: cannot reindex from a duplicate axis
Steps to Reproduce
bt = Backtest(data, ClusterMACross, cash=100000, exclusive_orders=True)
output = bt.optimize(p_center_ma=range(10, 40, 10),
#p_lb_ma_slow=range(200, 1000, 100),
#p_hb_ma_slow=range(50, 120, 10),
p_lb_ma_trend=range(200, 1000, 200),
#p_portion=range(10, 50, 10),
maximize='Sharpe Ratio',
constraint=lambda param: param.p_center_ma < param.p_lb_ma_trend
)
print(output)
print(output._strategy)
Just to be sure, what is you pandas version and can you do pip install -U pandas?
It is 1.4.0
(bt_venv) PS D:\repos\backtesting_py_2\bt_venv\Scripts> pip install -U pandas
Requirement already satisfied: pandas in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (1.4.0)
Requirement already satisfied: python-dateutil>=2.8.1 in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (from pandas) (2021.3)
Requirement already satisfied: numpy>=1.18.5 in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (from pandas) (1.22.2)
Requirement already satisfied: six>=1.5 in d:\repos\backtesting_py_2\bt_venv\lib\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)
(bt_venv) PS D:\repos\backtesting_py_2\bt_venv\Scripts>
Aparently, there's a pandas bug here for your input:
https://github.com/kernc/backtesting.py/blob/94d20da85e278102a0cc71b27a3a35b815e11648/backtesting/_stats.py#L12-L24
Maybe you can investigate it by placing breakpoint() before the line:
df = df.reindex(dd.index)
and see what we are trying to set.
Alternatively, provide a full reproducible MWE that can be worked on. The stats computation seems to pass correctly on CI with pandas 1.4.
This is what I see with the debugger.
This is my code:
# %%
from datetime import datetime, timedelta
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
#from matplotlib import pyplot as plt
#import seaborn as sns
import pandas as pd
#import finta as TA
import math
# %%
data = pd.read_csv(
"btc_clusters_regression_results_garch_120_close_h1_270122.csv")
data['date'] = pd.to_datetime(data['date'])
# #data.set_index('Time', inplace = True)
data = data.set_index('date')
data.columns.values[0] = 'Close'
data.columns.values[1] = 'Open'
data.columns.values[2] = 'High'
data.columns.values[3] = 'Low'
data.columns.values[4] = 'Volume'
print(data.head(10))
# %%
def SMA(values, n):
return pd.Series(values).rolling(n).mean()
# %%
class ClusterMACross (Strategy):
#ClusterMACross(p_lb_ma_slow=700,p_hb_ma_slow=80,p_lb_ma_trend=600)
p_lb_ma_trend=200
#p_hb_ma_trend=6000
p_center_ma=10
p_lb_ma_slow=600
p_hb_ma_slow=50
p_portion = 33
p_slow_ma = 21
p_fast_ma = 8
def init(self):
# self.garch_ma_fast = self.I(SMA, self.data.garch, 2)
# self.angle_ma_fast = self.I(SMA, self.data.angle, 2)
self.close_ma_slow = self.I(SMA, self.data.Close, self.p_slow_ma)
self.close_ma_fast = self.I(SMA, self.data.Close, self.p_fast_ma)
self.center_ma = self.I(SMA, self.data.center, self.p_center_ma)
self.lb_ma_slow = self.I(SMA, self.data['low band'], self.p_lb_ma_slow)
self.hb_ma_slow = self.I(SMA, self.data['high band'], self.p_hb_ma_slow)
self.lb_ma_trend = self.I(SMA, self.data['low band'], self.p_lb_ma_trend)
def next(self):
if crossover(self.close_ma_fast, self.lb_ma_trend) and self.data.Close[-1] > self.lb_ma_trend[-1]:
self.position.close()
self.buy(size=0.99)
elif (crossover(self.lb_ma_slow, self.center_ma )):
self.position.close()
# elif (crossover(self.center_ma, self.hb_ma_slow)):
# if (self.position.pl > 0):
# self.position.close(portion=self.p_portion / 100)
# if (len(self.trades) > 0):
# print(len(self.trades))
# self.trades[0].sl = self.trades[0].entry_price
# elif (crossover(self.hb_ma_slow, self.center_ma)
# and self.center_ma[-1] > self.lb_ma_slow[-1]
# ):
# self.position.close(portion=self.p_portion / 100)
# %%
bt = Backtest(data, ClusterMACross, cash=100000, exclusive_orders=True)
#output = bt.run()
#print(output)
#bt.plot(plot_return = False, plot_pl = True, resample=True)
output = bt.optimize(p_center_ma=range(10, 100, 10),
#p_lb_ma_slow=range(200, 1000, 100),
#p_hb_ma_slow=range(50, 120, 10),
p_lb_ma_trend=range(200, 1000, 200),
p_portion=range(10, 50, 10),
maximize='Sortino Ratio',
#maximize='Sharpe Ratio',
#constraint=lambda param: param.p_lb_ma_trend < param.p_lb_ma_slow,
constraint=lambda param: param.p_center_ma < param.p_lb_ma_trend
)
print(output)
print(output._strategy)
Data btc_clusters_regression_results_garch_120_close_h1_270122.csv