modelDown
modelDown copied to clipboard
DALEX on custom stacked models
Pseudo-reprex below to illustrate workflow.
There are 2 stages of stacking, and below is abbreviated to final stage.
# input data for prediction; this data are themselves a result of stacked model
df <- tibble::tribble(
~x1, ~x2, ~true, ~pred1, ~pred2, ~pred3, ~pred4, ~pred5,
"0016", 1, 11255, 9782.06546666667, 8226.73783726366, 8423.53411898339, 7663.85714285714, 7778.32234611454,
"0016", 2, 10155, 9917.16225000001, 7390.2726470072, 7548.50621212894, 6011.57142857143, 7020.0197927677,
"0016", 3, 9905, 8365.66048333333, 4748.35733132711, 4897.40398331136, 5625.14285714286, 5197.59820269678,
"0026", 1, 9569, 10542.7790333333, 12448.8281473898, 12982.2853847065, 9529.42857142857, 9913.60100542533,
"0026", 2, 15004, 12332.88455, 13118.3179554928, 13490.4519001908, 9449.14285714286, 9782.48187764126,
"0027", 1, 4623, 6228.92556666668, 7901.02224985066, 8072.3059097473, 7663.85714285714, 7564.7019858157,
"0027", 2, 3666, 3902.33416666666, 5351.58779239503, 5501.55032427708, 5757.85714285714, 5791.90612060224,
"0027", 3, 2046, 3730.91108333333, 5405.90164588071, 5431.22100425988, 5700, 5574.85787520228,
"0345", 1, 7848, 7911.66811666667, 7332.14726332333, 7535.03388134704, 8428.85714285714, 7504.20919309283,
"0345", 2, 5594, 6249.8431, 5302.09068924222, 5602.24650648537, 6253, 5936.17306199591,
"0348", 1, 6118, 5888.9112, 6782.1549012783, 6983.85792156352, 7145.28571428571, 6996.64665890851,
"0348", 2, 4115, 4655.3621, 4061.92478416692, 4339.3944039624, 5379.71428571429, 5201.36079952954,
"0348", 3, 3792, 4703.56786666666, 4862.77758785772, 4886.36623749198, 5413.85714285714, 5316.2047603152,
"1000", 1, 9982, 8894.2428, 8950.05680053561, 8724.27457157357, 7643.14285714286, 8427.52273508174,
"1000", 2, 4218, 5103.73553333333, 6755.30317981863, 6492.15505744351, 7836, 6900.52725335413,
"1022", 1, 9021, 8966.84941666667, 8921.14926298024, 8514.45660876879, 8590.57142857143, 8566.07119574923,
"1022", 2, 11692, 10205.8180333333, 8895.88440879051, 8417.59814231434, 8185.85714285714, 8225.60579235643,
"1022", 3, 9420, 9664.82173333334, 9422.99681882565, 8835.71873031759, 7853.57142857143, 8126.76078652109,
"1022", 4, 6850, 7419.07043333333, 8995.48869657391, 8194.63910112673, 7604.14285714286, 7815.14405713875,
"1022", 5, 6850, 7419.07043333333, 8817.8438463534, 7883.22080414475, 6846.14285714286, 7515.84608489043
)
# model list for stacking
md <- list(rf,
pca,
svm,
enet)
model_pred_stack <- function(df, md) {
# iterate over list of models in md, and average prediction
temp <- 0
for (i in 1:length(md)) {
temp <- temp + predict(md[[i]], df)
}
temp <- temp / length(md)
return(temp)
}
model_pred <- model_pred_stack(df = df, md)
# with DALEX, have to loop over list of models one by one, which doesn't reflect intention of stacking; otherwise, modelDown will complain
explain_stacked <- explain(
md[[1]],
data = df,
y = df$true,
label = "stacked"
)
modelDown(explain_stacked,
device = "svg",
output_folder = "output_data/modelDown_stacked")
# passing list of models; modelDown fails to generate diagnostics, aside from data description
explain_stacked <- explain(
md,
data = df,
y = df$true,
label = "stacked"
)
modelDown(explain_stacked,
device = "svg",
output_folder = "output_data/modelDown_stacked")
Thank you for provided example. I will look into it.
Appreciate the prompt response; your team's work (starting from DALEX
) is a game changer 👍