Reading custom futures bundle gives KeyError: <class 'zipline.assets.continuous_futures.ContinuousFuture'>
Dear Zipline Maintainers,
Before I tell you about my issue, let me describe my environment:
Environment
- Operating System:
Linux 0a2e3f5e9596 4.19.128-microsoft-standard #1 SMP Tue Jun 23 12:58:10 UTC 2020 x86_64 GNU/Linux - Python Version:
Python 3.6.12 - Python Bitness:
64 - How did you install Zipline: use this Dockerfile
- Python packages:
alembic==1.4.3
appdirs==1.4.4
argon2-cffi==20.1.0
async-generator==1.10
attrs==20.2.0
backcall==0.2.0
bcolz==1.2.1
black==20.8b1
bleach==3.2.1
Bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.6.20
cffi==1.14.3
chardet==3.0.4
click==7.1.2
cycler==0.10.0
dataclasses==0.7
decorator==4.4.2
defusedxml==0.6.0
empyrical==0.5.5
entrypoints==0.3
h5py==3.0.0
idna==2.10
importlib-metadata==2.0.0
inflection==0.5.1
intervaltree==3.1.0
ipykernel==5.3.4
ipython==7.16.1
ipython-genutils==0.2.0
ipywidgets==7.5.1
iso3166==1.0.1
iso4217==1.6.20180829
jedi==0.17.2
Jinja2==2.11.2
joblib==0.17.0
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==6.1.7
jupyter-console==6.2.0
jupyter-core==4.6.3
jupyterlab-pygments==0.1.2
kiwisolver==1.2.0
Logbook==1.5.3
lru-dict==1.1.6
lxml==4.6.1
Mako==1.1.3
MarkupSafe==1.1.1
matplotlib==3.3.2
mistune==0.8.4
more-itertools==8.6.0
multipledispatch==0.6.0
mypy-extensions==0.4.3
nb-black==1.0.7
nbclient==0.5.1
nbconvert==6.0.7
nbformat==5.0.8
nest-asyncio==1.4.2
networkx==1.11
notebook==6.1.4
numexpr==2.7.1
numpy==1.19.3
packaging==20.4
pandas==0.22.0
pandas-datareader==0.8.1
pandocfilters==1.4.3
parso==0.7.1
pathspec==0.8.1
patsy==0.5.1
pexpect==4.8.0
pickleshare==0.7.5
Pillow==8.0.1
prometheus-client==0.8.0
prompt-toolkit==3.0.8
ptyprocess==0.6.0
pycparser==2.20
pyfolio==0.9.2
Pygments==2.7.2
pyparsing==2.4.7
pyrsistent==0.17.3
python-dateutil==2.8.1
python-editor==1.0.4
python-interface==1.6.0
pytz==2020.1
pyzmq==19.0.2
qtconsole==4.7.7
QtPy==1.9.0
Quandl==3.5.3
regex==2020.10.28
requests==2.24.0
rope==0.18.0
scikit-learn==0.23.2
scipy==1.5.3
seaborn==0.11.0
Send2Trash==1.5.0
six==1.15.0
sortedcontainers==2.2.2
SQLAlchemy==1.3.20
statsmodels==0.12.1
TA-Lib==0.4.19
tables==3.6.1
terminado==0.9.1
testpath==0.4.4
threadpoolctl==2.1.0
toml==0.10.2
toolz==0.11.1
tornado==6.1
tqdm==4.51.0
trading-calendars==2.0.0
traitlets==4.3.3
typed-ast==1.4.1
typing-extensions==3.7.4.3
urllib3==1.25.11
wcwidth==0.2.5
webencodings==0.5.1
widgetsnbextension==3.5.1
# Editable Git install with no remote (zipline==0+unknown)
-e /zipline
zipp==3.4.0
Now that you know a little about me, let me tell you about the issue I am having:
Description of Issue
- What did you expect to happen?
Algorithm runs to completion.
- What happened instead?
Algorithm throws this exception:
-----------
KeyErrorTraceback (most recent call last)
<ipython-input-3-99c44213dbe4> in <module>
209 capital_base=starting_portfolio,
210 data_frequency="daily",
--> 211 bundle="random_futures",
212 )
/zipline/zipline/utils/run_algo.py in run_algorithm(start, end, initialize, capital_base, handle_data, before_trading_start, analyze, data_frequency, bundle, bundle_timestamp, trading_calendar, metrics_set, benchmark_returns, default_extension, extensions, strict_extensions, environ, blotter)
407 environ=environ,
408 blotter=blotter,
--> 409 benchmark_spec=benchmark_spec,
410 )
411
/zipline/zipline/utils/run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter, benchmark_spec)
214 } if algotext is None else {
215 'algo_filename': getattr(algofile, 'name', '<algorithm>'),
--> 216 'script': algotext,
217 }
218 ).run()
/zipline/zipline/algorithm.py in run(self, data_portal)
641 try:
642 perfs = []
--> 643 for perf in self.get_generator():
644 perfs.append(perf)
645
/zipline/zipline/gens/tradesimulation.py in transform(self)
203 for dt, action in self.clock:
204 if action == BAR:
--> 205 for capital_change_packet in every_bar(dt):
206 yield capital_change_packet
207 elif action == SESSION_START:
/zipline/zipline/gens/tradesimulation.py in every_bar(dt_to_use, current_data, handle_data)
131 metrics_tracker.process_commission(commission)
132
--> 133 handle_data(algo, current_data, dt_to_use)
134
135 # grab any new orders from the blotter, then clear the list.
/zipline/zipline/utils/events.py in handle_data(self, context, data, dt)
216 context,
217 data,
--> 218 dt,
219 )
220
/zipline/zipline/utils/events.py in handle_data(self, context, data, dt)
235 """
236 if self.rule.should_trigger(dt):
--> 237 self.callback(context, data)
238
239
<ipython-input-3-99c44213dbe4> in rebalance(context, data)
138 fields=["close", "volume"],
139 frequency="1d",
--> 140 bar_count=long_trend_window,
141 )
142
/zipline/zipline/_protocol.pyx in zipline._protocol.check_parameters.__call__.assert_keywords_and_call()
/zipline/zipline/_protocol.pyx in zipline._protocol.BarData.history()
/zipline/zipline/data/data_portal.py in get_history_window(self, assets, end_dt, bar_count, frequency, field, data_frequency, ffill)
965 else:
966 df = self._get_history_daily_window(assets, end_dt, bar_count,
--> 967 field, data_frequency)
968 elif frequency == "1m":
969 if field == "price":
/zipline/zipline/data/data_portal.py in _get_history_daily_window(self, assets, end_dt, bar_count, field_to_use, data_frequency)
804
805 data = self._get_history_daily_window_data(
--> 806 assets, days_for_window, end_dt, field_to_use, data_frequency
807 )
808 return pd.DataFrame(
/zipline/zipline/data/data_portal.py in _get_history_daily_window_data(self, assets, days_for_window, end_dt, field_to_use, data_frequency)
827 field_to_use,
828 days_for_window,
--> 829 extra_slot=False
830 )
831 else:
/zipline/zipline/data/data_portal.py in _get_daily_window_data(self, assets, field, days_in_window, extra_slot)
1115 days_in_window,
1116 field,
-> 1117 extra_slot)
1118 if extra_slot:
1119 return_array[:len(return_array) - 1, :] = data
/zipline/zipline/data/history_loader.py in history(self, assets, dts, field, is_perspective_after)
547 dts,
548 field,
--> 549 is_perspective_after)
550 end_ix = self._calendar.searchsorted(dts[-1])
551
/zipline/zipline/data/history_loader.py in _ensure_sliding_windows(self, assets, dts, field, is_perspective_after)
429 adj_dts = prefetch_dts
430 prefetch_len = len(prefetch_dts)
--> 431 array = self._array(prefetch_dts, needed_assets, field)
432
433 if field == 'sid':
/zipline/zipline/data/history_loader.py in _array(self, dts, assets, field)
571 dts[0],
572 dts[-1],
--> 573 assets,
574 )[0]
575
/zipline/zipline/data/dispatch_bar_reader.py in load_raw_arrays(self, fields, start_dt, end_dt, sids)
110 for i, asset in enumerate(assets):
111 t = type(asset)
--> 112 sid_groups[t].append(asset)
113 out_pos[t].append(i)
114
KeyError: <class 'zipline.assets.continuous_futures.ContinuousFuture'>
Here is how you can reproduce this issue on your machine:
Reproduction Steps
- Create a container based on the Dockerfile mentioned in Environment section
- Install all dependencies
- Run this notebook to create a random futures dataset
- Run this notebook to run the algorithm
What steps have you taken to resolve this already?
Google. Read the source code.
Anything else?
I think the problem starts here.
The future_minute_reader argument to DataPortal is None which causes no readers for a ContinuousFuture to be passed to the AssetDispatchMinuteBarReader object when it is initialized here
Sincerely,
root
Have you tried instantiating a DataPortal object outside of the backtest algorithm? Then you could pass in any custom args and check the data is at least accessible that way...
@moseshassan thanks for the suggestion. I don't know how to do that.
Are there examples you can point me to that shows how to instantiate a DataPortal object outside of the backtest algorithm?
This thread has a good example. You may need to change some of the arguments to match your bundle data.
@moseshassan thanks for the tip. I am able to instantiate a DataPortal object outside the backtest algorithm and access the data in it.
This is the code I used:
import extension # the extension.py in ~/.zipline to register the 'random_futures' bundle
from zipline.data.data_portal import DataPortal
from zipline.data import bundles
import pandas as pd
from trading_calendars import register_calendar, get_calendar
cal = get_calendar("us_futures")
bundle_name = "random_futures" # "a bundle name"
end_date = pd.Timestamp("2019-01-31", tz="utc")
window = 2 # how many days you want to look back
bundle_data = bundles.load(bundle_name)
data_por = DataPortal(
bundle_data.asset_finder,
cal,
bundle_data.equity_daily_bar_reader.first_trading_day,
future_minute_reader=bundle_data.equity_minute_bar_reader,
future_daily_reader=bundle_data.equity_daily_bar_reader,
adjustment_reader=bundle_data.adjustment_reader,
)
contracts = ["ADM23", "CCH19"]
syms = [data_por.asset_finder.lookup_future_symbol(contract) for contract in contracts]
data = data_por.get_history_window(
assets=syms,
end_dt=end_date,
bar_count=window,
frequency="1d",
data_frequency="daily",
field="close",
)
data.head(5)
Is there a way to use this DataPortal object inside zipline.run_algorithm()?
@hsm207 Passing a specific DataPortal to run_algorithm doesn't seem to be part of the original interface.
One way would be to fork the repo and add the required parameters around here:
https://github.com/quantopian/zipline/blob/014f1fc339dc8b7671d29be2d85ce57d3daec343/zipline/utils/run_algo.py#L159
Also note, the data portal can be accessed via the context attribute within initialize
Perhaps a contributor would know if it's safe to do that?
@moseshassan thanks for sharing your thoughts. I'll go for the fork solution.