FinRL
FinRL copied to clipboard
DF count is off - following FinRL_PortfolioAllocation_NeurIPS_2020
Describe the bug I compared the dataframes after Feature Engineering (row count of 3627) to the dataframe after adding covariance as states (row count of 3376) and there is 252 row difference. I exported the DF to csv and found that the first 252 days from the data is missing after adding the covariance. I understand the loopback is for 252 days (one year), but why would it remove the first 252 days from the DF?
To Reproduce
Config_tickers.py:
SINGLE_TICKER = ["AAPL"]
My Test.py:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
import datetime
import os
import sys
from finrl import config
from finrl import config_tickers
from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.finrl_meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline,convert_daily_return_to_pyfolio_ts
from finrl.finrl_meta.data_processor import DataProcessor
from finrl.finrl_meta.data_processors.processor_yahoofinance import YahooFinanceProcessor
sys.path.append("../FinRL-Library")
if not os.path.exists("./" + config.DATA_SAVE_DIR):
os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
os.makedirs("./" + config.RESULTS_DIR)
# print(config_tickers.SINGLE_TICKER)
pd.set_option('display.max_columns', None)
dp = YahooFinanceProcessor()
df = dp.download_data(start_date = '2008-01-01',
end_date = '2022-05-30',
ticker_list = config_tickers.SINGLE_TICKER, time_interval='1D')
print("**************Yahoo Data**************")
print(df.shape)
print(df.head())
fe = FeatureEngineer(
use_technical_indicator=True,
use_turbulence=False,
user_defined_feature = False)
df = fe.preprocess_data(df)
print("**************Data After Feature Engineer (Pre-Processed Data)**************")
print(df.shape)
print(df.head())
df.to_csv("datawithTA.csv")
# add covariance matrix as states
df = df.sort_values(['date', 'tic'], ignore_index=True)
df.index = df.date.factorize()[0]
cov_list = []
return_list = []
# look back is one year
lookback = 252
for i in range(lookback, len(df.index.unique())):
data_lookback = df.loc[i - lookback:i, :]
price_lookback = data_lookback.pivot_table(index='date', columns='tic', values='close')
return_lookback = price_lookback.pct_change().dropna()
return_list.append(return_lookback)
covs = return_lookback.cov().values
cov_list.append(covs)
df_cov = pd.DataFrame({'date': df.date.unique()[lookback:], 'cov_list': cov_list, 'return_list': return_list})
df = df.merge(df_cov, on='date')
df = df.sort_values(['date', 'tic']).reset_index(drop=True)
print("**************Data With Covariance Matrix**************")
print(df.shape)
print(df.head())
df.to_csv("ProcessedData.csv")
Desktop (please complete the following information):
- OS: Mac OS
- Python: 3.9