kernels get killed OOM when running 1min data REG_CN

Open DanielKui opened this issue 1 year ago • 1 comments

I 'm running 1min data ,the script as below, swap mem is 128G, It will be killed by OOM, why? start time:2020-09-14 end_time: 2021-06-21

and all.txt contains 4067 stocks. I attached all.txt at the end. I downloaded these 1min data from yahoo by： python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min

I used 8 kernels

how should I configure?

`#! /usr/bin/env python import os import qlib

import pandas as pd from qlib.contrib.data.handler import Alpha158 from qlib.data.dataset import TSDatasetH from qlib.contrib.model.pytorch_alstm_ts import ALSTM from qlib.tests.data import GetData from qlib.constant import REG_CN from qlib.data import D from qlib.contrib.report import analysis_model, analysis_position import multiprocessing from qlib.utils import exists_qlib_data, init_instance_by_config from qlib.workflow import R from qlib.workflow.record_temp import SignalRecord, PortAnaRecord from qlib.utils import flatten_dict

if name == "main": multiprocessing.freeze_support()

provider_uri = "~/.qlib/qlib_data/cn_data_1min"  # target_dir

qlib.init(provider_uri=provider_uri)

GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
qlib.init(provider_uri=provider_uri, region=REG_CN,kernels=8, expression_cache=None, dataset_cache=None)

D.calendar(start_time='2020-09-14', end_time='2021-06-21',freq='1min')
benchmark = "SH000300"

instruments = D.instruments(market='all')
#print (type(instruments))
#print (instruments)

stock_list = D.list_instruments(instruments=instruments,
start_time='2020-09-14',
end_time='2021-06-21',
freq = '1min',
as_list=True)

#D.calendar()

# 设置日期、股票池等参数
data_handler_config = {
"start_time":"2020-09-14",
"end_time":"2021-06-21",
"fit_start_time":"2020-09-20",
"fit_end_time":"2021-06-18",
"freq":'1min',
"instruments":stock_list }

h = Alpha158(**data_handler_config)

# 获取列名(因子名称)
#print(h.get_cols())

Alpha158_df_feature = h.fetch(col_set="feature")

#print(Alpha158_df_feature)

task = {
"model": {
"class": "LGBModel",
"module_path": "qlib.contrib.model.gbdt",
"kwargs": {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
        },
    },
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": data_handler_config,
            },
"segments": {
"train": ("2020-09-14", "2020-11-30"),
"valid": ("2020-12-01", "2021-02-28"),
"test": ("2021-03-01", "2021-06-20"),
            },
        },
    },
}

# model initiaiton
model = init_instance_by_config(task["model"]) #
dataset = init_instance_by_config(task["dataset"])
# start exp to train model
with R.start(experiment_name="train_model"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)  #拟合模型
        R.save_objects(trained_model=model)
        rid = R.get_recorder().id


###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "1min",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": "2020-09-14",
        "end_time": "2021-06-15",
        "account": 1000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "1min",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# backtest and analysis

with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")


    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "1min")
    par.generate()

''' recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")

    label_df = dataset.prepare("test", col_set="label")
    label_df.columns = ["label"]

    report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1min.pkl")
    positions = recorder.load_object("portfolio_analysis/positions_normal_1min.pkl")
    analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1min.pkl")

    analysis_position.report_graph(report_normal_df)
    analysis_position.risk_analysis_graph(analysis_df, report_normal_df)


    pred_df = recorder.load_object("pred.pkl")
    pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)

    print (f"label_df size:{len(label_df)}")
    print (f"pred_label size:{len(pred_label)}")
    #os._exit(0)
    ###analysis_position.score_ic_graph(pred_label)
    
    ###analysis_model.model_performance_graph(pred_label)

''' `

all.txt

Jul 10 '24 04:07 DanielKui

We think it may be caused by the machine having too little memory, we tried using your code but did not reproduce the problem. You can try it by shortening the training data, or you can try a different machine with more memory.

Aug 23 '24 05:08 SunsetWolf