FinRL icon indicating copy to clipboard operation
FinRL copied to clipboard

DF count is off - following FinRL_PortfolioAllocation_NeurIPS_2020

Open varunpan opened this issue 3 years ago • 0 comments

Describe the bug I compared the dataframes after Feature Engineering (row count of 3627) to the dataframe after adding covariance as states (row count of 3376) and there is 252 row difference. I exported the DF to csv and found that the first 252 days from the data is missing after adding the covariance. I understand the loopback is for 252 days (one year), but why would it remove the first 252 days from the DF?

To Reproduce Config_tickers.py: SINGLE_TICKER = ["AAPL"]

My Test.py:

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
import datetime
import os
import sys

from finrl import config
from finrl import config_tickers
from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.finrl_meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline,convert_daily_return_to_pyfolio_ts
from finrl.finrl_meta.data_processor import DataProcessor
from finrl.finrl_meta.data_processors.processor_yahoofinance import YahooFinanceProcessor
sys.path.append("../FinRL-Library")

if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

# print(config_tickers.SINGLE_TICKER)
pd.set_option('display.max_columns', None)

dp = YahooFinanceProcessor()
df = dp.download_data(start_date = '2008-01-01',
                     end_date = '2022-05-30',
                     ticker_list = config_tickers.SINGLE_TICKER, time_interval='1D')

print("**************Yahoo Data**************")
print(df.shape)
print(df.head())

fe = FeatureEngineer(
                    use_technical_indicator=True,
                    use_turbulence=False,
                    user_defined_feature = False)

df = fe.preprocess_data(df)

print("**************Data After Feature Engineer (Pre-Processed Data)**************")
print(df.shape)
print(df.head())
df.to_csv("datawithTA.csv")

# add covariance matrix as states
df = df.sort_values(['date', 'tic'], ignore_index=True)
df.index = df.date.factorize()[0]

cov_list = []
return_list = []

# look back is one year
lookback = 252
for i in range(lookback, len(df.index.unique())):
    data_lookback = df.loc[i - lookback:i, :]
    price_lookback = data_lookback.pivot_table(index='date', columns='tic', values='close')
    return_lookback = price_lookback.pct_change().dropna()
    return_list.append(return_lookback)

    covs = return_lookback.cov().values
    cov_list.append(covs)

df_cov = pd.DataFrame({'date': df.date.unique()[lookback:], 'cov_list': cov_list, 'return_list': return_list})
df = df.merge(df_cov, on='date')
df = df.sort_values(['date', 'tic']).reset_index(drop=True)

print("**************Data With Covariance Matrix**************")
print(df.shape)
print(df.head())
df.to_csv("ProcessedData.csv")

Desktop (please complete the following information):

  • OS: Mac OS
  • Python: 3.9

varunpan avatar Jun 29 '22 22:06 varunpan