Qlib Features Calculation error
Hello, I am pretty new to qlib scripting so please bare with me. I am currently running into issues with feature calculations and how that process exactly works. for some background information, I am trying to get it to process my own data from a csv file, it is stock market data. And it has the correct formation with datetime, symbol, open, high, low, close, volume. And I want to make it so that my script can process this data and calculate all 158 features for the data. Please let me know if that is possible, and if it is, I would appreciate some pseudocode for it. Thank you so much. I have provided some code below for more context:
import qlib from qlib.contrib.data.handler import Alpha158 import pandas as pd import os import logging
Enable detailed logging for debugging
logging.basicConfig(level=logging.DEBUG)
def initialize_qlib(data_dir): logging.info(f"Initializing Qlib with directory: {data_dir}") qlib.init(provider_uri=data_dir) logging.info("Qlib initialized successfully.")
def prepare_qlib_data(csv_path, output_dir): logging.info(f"Preparing data from CSV: {csv_path}") df = pd.read_csv(csv_path) logging.debug(f"Original DataFrame:\n{df.head()}")
# Ensure datetime parsing
if 'datetime' in df.columns:
df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
elif 'date' in df.columns:
df['datetime'] = pd.to_datetime(df['date'], errors='coerce')
else:
raise ValueError("CSV file must have a 'datetime' or 'date' column.")
# Drop invalid rows
df.dropna(subset=['datetime'], inplace=True)
df.set_index(['datetime', 'symbol'], inplace=True)
# Save to parquet
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'data.parquet')
df.to_parquet(output_file)
logging.info(f"Data prepared and saved to: {output_file}")
def setup_alpha158_handler(data_dir): logging.info("Setting up Alpha158 data handler.")
# Define configuration for Alpha158
data_handler_config = {
"start_time": "2013-12-30",
"end_time": "2024-12-24",
"fit_start_time": "2015-01-01",
"fit_end_time": "2017-01-01",
"instruments": "AAPL",
}
# Initialize the handler
handler = Alpha158(**data_handler_config)
logging.info("Alpha158 handler initialized successfully.")
return handler
def calculate_features(handler, instrument, freq): try: logging.info(f"Calculating features for {instrument} at {freq} frequency.")
# Debug available columns
logging.debug(f"Columns in data: {handler.get_cols()}")
# Fetch labels
labels = handler.fetch(col_set="label")
logging.debug(f"Labels:\n{labels.head()}")
# Fetch features
features = handler.fetch(col_set="feature")
logging.debug(f"Features:\n{features.head()}")
print("Features and labels calculated successfully.")
return features, labels
except Exception as e:
logging.error(f"Error calculating features for {instrument}: {e}")
raise
if name == "main": csv_path = "C:/Users/14168/.qlib/qlib_data/data/historical (3).csv" # Input CSV data_dir = "C:/Users/14168/.qlib/qlib_data/" # Qlib Data Directory
prepare_qlib_data(csv_path, data_dir)
initialize_qlib(data_dir)
# Calculate and display features
calculate_features('AAPL', freq='day')
try:
# Prepare data
prepare_qlib_data(csv_path, data_dir)
# Initialize Qlib
initialize_qlib(data_dir)
# Setup Alpha158
handler = setup_alpha158_handler(data_dir)
# Calculate and print features
features, labels = calculate_features(handler, instrument='AAPL', freq='day')
except Exception as e:
logging.error(f"An error occurred: {e}")
If you want to use your own prepared data, you first need to convert the data from csv to .bin file, which can be roughly divided into two steps: first do the normalisation of the data, and then converted to .bin file. You can refer to here for the specific steps.