SHAP方法能用于多变量预测的DeepAR吗?
SHAP方法不支持多变量预测,请问有什么解决方案?
代码如下: import numpy as np import pandas as pd import paddle
from paddlets.datasets.tsdataset import TSDataset from paddlets.models.forecasting import DeepARModel from paddlets.transform.sklearn_transforms import StandardScaler from paddlets.xai.post_hoc.shap_explainer import ShapExplainer
x = np.linspace(-np.pi, np.pi, 2000)
data = pd.DataFrame( { 'time_col': pd.date_range('2022-01-01', periods=2000, freq='1h'), 'y1': np.sin(x) * 1 + np.random.randn(2000), 'y2': np.sin(x) * 2 + np.random.randn(2000), 'y3': np.sin(x) * 3 + np.random.randn(2000), 'y4': np.sin(x) * 4 + np.random.randn(2000), 'y5': np.sin(x) * 5 + np.random.randn(2000), 'y6': np.sin(x) * 6 + np.random.randn(2000), 'y7': np.sin(x) * 7 + np.random.randn(2000)} )
ts = TSDataset.load_from_dataframe( data, time_col='time_col', target_cols=['y1','y2','y3','y4','y5','y6','y7'], freq='1h', fillna_method='pre' )
train, val = ts.split(0.8) scaler = StandardScaler() scaler.fit(train) scaled = scaler.transform(ts) train_scaled = scaler.transform(train) val_scaled = scaler.transform(val)
deepar = DeepARModel( in_chunk_len = 13, out_chunk_len = 1, max_epochs=100, optimizer_params = dict(learning_rate=0.0002), dropout = 0.15, batch_size = 32, # 批量大小 patience=20, num_samples = 101, regression_mode="sampling", output_mode="quantiles", rnn_type_or_module = 'LSTM' ) np.random.seed(2023) paddle.seed(2023)
模型训练
deepar.fit(train_scaled, val_scaled) se = ShapExplainer(deepar, train_scaled, background_sample_number=100, keep_index=True, use_paddleloader=False) shap_value = se.explain(val_scaled, nsamples=100)
报错如下:
ValueError Traceback (most recent call last)
/tmp/ipykernel_35604/1646252647.py in
/usr/local/lib/python3.7/dist-packages/paddlets/xai/post_hoc/shap_explainer.py in init(self, model, background_data, background_sample_number, shap_method, task_type, seed, use_paddleloader, **kwargs) 62 raise_if(shap_method not in ['kernel', 'deep'], 'Only support kernel shap and deep shap!') 63 raise_if_not(isinstance(background_data.freq, str), 'Only support timeindex data!') ---> 64 raise_if(len(background_data.get_target().columns) > 1, 'Only support univariate output!') 65 66 self._model = model
/usr/local/lib/python3.7/dist-packages/paddlets/logger/logger.py in raise_if(condition, message, logger) 154 155 """ --> 156 raise_if_not(not condition, message, logger)
/usr/local/lib/python3.7/dist-packages/paddlets/logger/logger.py in raise_if_not(condition, message, logger) 133 if not condition: 134 logger.error("ValueError: " + message) --> 135 raise ValueError(message) 136 137
ValueError: Only support univariate output!
您好,抱歉,目前无法支持多变量。
长时间未回复,该issue已关闭,如仍有问题可以reopen或新开issue。