mlr3
mlr3 copied to clipboard
Assertion on 'types' failed
I get an error inside my pipeline and I can't figure out what is the problem.
Here is reproducible example:
data_ <- fread("https://contentiobatch.blob.core.windows.net/qc-live/pead_test.csv")
data_$bin_extreme_ret_excess_stand_66 <- as.factor(data_$bin_extreme_ret_excess_stand_66)
task_ <- as_task_classif(data_, id = "extreme", target = "bin_extreme_ret_excess_stand_66", positive = "1")
library(mlr3verse)
learner = lrn("classif.ranger", predict_type = "prob")
graph = po("removeconstants", ratio = 0.05) %>>%
# modelmatrix
po("branch", options = c("nop_filter", "modelmatrix"), id = "interaction_branch") %>>%
gunion(list(po("nop", id = "nop_filter"), po("modelmatrix", formula = ~ . ^ 2))) %>>%
po("unbranch", id = "interaction_unbranch") %>>%
# scaling
po("branch", options = c("nop_prep", "yeojohnson", "pca", "ica"), id = "prep_branch") %>>%
gunion(list(po("nop", id = "nop_prep"), po("yeojohnson"), po("pca", scale. = TRUE), po("ica"))) %>>%
po("unbranch", id = "prep_unbranch") %>>%
learner
plot(graph)
graph_learner = as_learner(graph)
as.data.table(graph_learner$param_set)[1:65, .(id, class, lower, upper)]
search_space = ps(
# preprocesing
interaction_branch.selection = p_fct(levels = c("nop_filter", "modelmatrix")),
prep_branch.selection = p_fct(levels = c("nop_prep", "yeojohnson", "pca", "ica")),
pca.rank. = p_int(2, 6, depends = prep_branch.selection == "pca"),
ica.n.comp = p_int(2, 6, depends = prep_branch.selection == "ica"),
yeojohnson.standardize = p_lgl(depends = prep_branch.selection == "yeojohnson"),
# model
classif.ranger.mtry.ratio = p_dbl(0.2, 1),
classif.ranger.max.depth = p_int(2, 6)
)
# plan("multisession", workers = 4L)
at_ranger = auto_tuner(
method = "random_search",
learner = graph_learner,
resampling = rsmp("cv", folds = 3),
measure = msr("classif.acc"),
search_space = search_space,
term_evals = 20
)
at_ranger$train(task_extreme)
The error I get is:
Error in selector_type(private$.feature_types) :
Assertion on 'types' failed: Must be a subset of {'logical','integer','numeric','character','factor','ordered','POSIXct'}, but is {'logical','integer','numeric','character','factor','ordered','POSIXct','imageuri'}.
This happened PipeOp modelmatrix's $train()
It seems like I have imageuri
class in my table, but not sure how is that possible ?
Cannot reproduce (see reprex below). imageuri
is a new feature type in mlr3torch
(which is currently experimental), is it possible that this somehow caused the bug?
library(mlr3verse)
#> Loading required package: mlr3
library(data.table)
data_ = fread("https://contentiobatch.blob.core.windows.net/qc-live/pead_test.csv")
data_$bin_extreme_ret_excess_stand_66 = as.factor(data_$bin_extreme_ret_excess_stand_66)
task_extreme = as_task_classif(data_, id = "extreme", target = "bin_extreme_ret_excess_stand_66", positive = "1")
learner = lrn("classif.ranger", predict_type = "prob")
graph = po("removeconstants", ratio = 0.05) %>>%
# modelmatrix
po("branch", options = c("nop_filter", "modelmatrix"), id = "interaction_branch") %>>%
gunion(list(po("nop", id = "nop_filter"), po("modelmatrix", formula = ~ .^2))) %>>%
po("unbranch", id = "interaction_unbranch") %>>%
# scaling
po("branch", options = c("nop_prep", "yeojohnson", "pca", "ica"), id = "prep_branch") %>>%
gunion(list(po("nop", id = "nop_prep"), po("yeojohnson"), po("pca", scale. = TRUE), po("ica"))) %>>%
po("unbranch", id = "prep_unbranch") %>>%
learner
plot(graph)
graph_learner = as_learner(graph)
as.data.table(graph_learner$param_set)[1:65, .(id, class, lower, upper)]
#> id class lower upper
#> 1: removeconstants.ratio ParamDbl 0 1
#> 2: removeconstants.rel_tol ParamDbl 0 Inf
#> 3: removeconstants.abs_tol ParamDbl 0 Inf
#> 4: removeconstants.na_ignore ParamLgl NA NA
#> 5: removeconstants.affect_columns ParamUty NA NA
#> 6: interaction_branch.selection ParamFct NA NA
#> 7: modelmatrix.formula ParamUty NA NA
#> 8: modelmatrix.affect_columns ParamUty NA NA
#> 9: prep_branch.selection ParamFct NA NA
#> 10: yeojohnson.eps ParamDbl 0 Inf
#> 11: yeojohnson.standardize ParamLgl NA NA
#> 12: yeojohnson.lower ParamDbl -Inf Inf
#> 13: yeojohnson.upper ParamDbl -Inf Inf
#> 14: yeojohnson.affect_columns ParamUty NA NA
#> 15: pca.center ParamLgl NA NA
#> 16: pca.scale. ParamLgl NA NA
#> 17: pca.rank. ParamInt 1 Inf
#> 18: pca.affect_columns ParamUty NA NA
#> 19: ica.n.comp ParamInt 1 Inf
#> 20: ica.alg.typ ParamFct NA NA
#> 21: ica.fun ParamFct NA NA
#> 22: ica.alpha ParamDbl 1 2
#> 23: ica.method ParamFct NA NA
#> 24: ica.row.norm ParamLgl NA NA
#> 25: ica.maxit ParamInt 1 Inf
#> 26: ica.tol ParamDbl 0 Inf
#> 27: ica.verbose ParamLgl NA NA
#> 28: ica.w.init ParamUty NA NA
#> 29: ica.affect_columns ParamUty NA NA
#> 30: classif.ranger.alpha ParamDbl -Inf Inf
#> 31: classif.ranger.always.split.variables ParamUty NA NA
#> 32: classif.ranger.class.weights ParamUty NA NA
#> 33: classif.ranger.holdout ParamLgl NA NA
#> 34: classif.ranger.importance ParamFct NA NA
#> 35: classif.ranger.keep.inbag ParamLgl NA NA
#> 36: classif.ranger.max.depth ParamInt 0 Inf
#> 37: classif.ranger.min.node.size ParamInt 1 Inf
#> 38: classif.ranger.min.prop ParamDbl -Inf Inf
#> 39: classif.ranger.minprop ParamDbl -Inf Inf
#> 40: classif.ranger.mtry ParamInt 1 Inf
#> 41: classif.ranger.mtry.ratio ParamDbl 0 1
#> 42: classif.ranger.num.random.splits ParamInt 1 Inf
#> 43: classif.ranger.num.threads ParamInt 1 Inf
#> 44: classif.ranger.num.trees ParamInt 1 Inf
#> 45: classif.ranger.oob.error ParamLgl NA NA
#> 46: classif.ranger.regularization.factor ParamUty NA NA
#> 47: classif.ranger.regularization.usedepth ParamLgl NA NA
#> 48: classif.ranger.replace ParamLgl NA NA
#> 49: classif.ranger.respect.unordered.factors ParamFct NA NA
#> 50: classif.ranger.sample.fraction ParamDbl 0 1
#> 51: classif.ranger.save.memory ParamLgl NA NA
#> 52: classif.ranger.scale.permutation.importance ParamLgl NA NA
#> 53: classif.ranger.se.method ParamFct NA NA
#> 54: classif.ranger.seed ParamInt -Inf Inf
#> 55: classif.ranger.split.select.weights ParamUty NA NA
#> 56: classif.ranger.splitrule ParamFct NA NA
#> 57: classif.ranger.verbose ParamLgl NA NA
#> 58: classif.ranger.write.forest ParamLgl NA NA
#> 59: <NA> <NA> NA NA
#> 60: <NA> <NA> NA NA
#> 61: <NA> <NA> NA NA
#> 62: <NA> <NA> NA NA
#> 63: <NA> <NA> NA NA
#> 64: <NA> <NA> NA NA
#> 65: <NA> <NA> NA NA
#> id class lower upper
search_space = ps(
# preprocesing
interaction_branch.selection = p_fct(levels = c("nop_filter", "modelmatrix")),
prep_branch.selection = p_fct(levels = c("nop_prep", "yeojohnson", "pca", "ica")),
pca.rank. = p_int(2, 6, depends = prep_branch.selection == "pca"),
ica.n.comp = p_int(2, 6, depends = prep_branch.selection == "ica"),
yeojohnson.standardize = p_lgl(depends = prep_branch.selection == "yeojohnson"),
# model
classif.ranger.mtry.ratio = p_dbl(0.2, 1),
classif.ranger.max.depth = p_int(2, 6)
)
# plan("multisession", workers = 4L)
at_ranger = auto_tuner(
method = "random_search",
learner = graph_learner,
resampling = rsmp("cv", folds = 3),
measure = msr("classif.acc"),
search_space = search_space,
term_evals = 2
)
at_ranger$train(task_extreme)
#> INFO [17:01:28.870] [bbotk] Starting to optimize 7 parameter(s) with '<OptimizerRandomSearch>' and '<TerminatorEvals> [n_evals=2, k=0]'
#> INFO [17:01:28.934] [bbotk] Evaluating 1 configuration(s)
#> INFO [17:01:29.007] [mlr3] Running benchmark with 3 resampling iterations
#> INFO [17:01:29.057] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 1/3)
#> INFO [17:01:30.263] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 3/3)
#> INFO [17:01:31.401] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 2/3)
#> INFO [17:01:32.557] [mlr3] Finished benchmark
#> INFO [17:01:32.591] [bbotk] Result of batch 1:
#> INFO [17:01:32.593] [bbotk] interaction_branch.selection prep_branch.selection pca.rank. ica.n.comp
#> INFO [17:01:32.593] [bbotk] nop_filter nop_prep NA NA
#> INFO [17:01:32.593] [bbotk] yeojohnson.standardize classif.ranger.mtry.ratio classif.ranger.max.depth
#> INFO [17:01:32.593] [bbotk] NA 0.7816803 2
#> INFO [17:01:32.593] [bbotk] classif.acc warnings errors runtime_learners
#> INFO [17:01:32.593] [bbotk] 0.5555177 0 0 3.463
#> INFO [17:01:32.593] [bbotk] uhash
#> INFO [17:01:32.593] [bbotk] a06deead-52dd-41a6-8ebd-6ac0d92ee6f1
#> INFO [17:01:32.604] [bbotk] Evaluating 1 configuration(s)
#> INFO [17:01:32.664] [mlr3] Running benchmark with 3 resampling iterations
#> INFO [17:01:32.669] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 3/3)
#> INFO [17:01:34.955] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 1/3)
#> INFO [17:01:37.279] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 2/3)
#> INFO [17:01:39.868] [mlr3] Finished benchmark
#> INFO [17:01:39.900] [bbotk] Result of batch 2:
#> INFO [17:01:39.901] [bbotk] interaction_branch.selection prep_branch.selection pca.rank. ica.n.comp
#> INFO [17:01:39.901] [bbotk] nop_filter nop_prep NA NA
#> INFO [17:01:39.901] [bbotk] yeojohnson.standardize classif.ranger.mtry.ratio classif.ranger.max.depth
#> INFO [17:01:39.901] [bbotk] NA 0.5291319 5
#> INFO [17:01:39.901] [bbotk] classif.acc warnings errors runtime_learners
#> INFO [17:01:39.901] [bbotk] 0.5650453 0 0 7.172
#> INFO [17:01:39.901] [bbotk] uhash
#> INFO [17:01:39.901] [bbotk] cbcf181e-a15c-4712-879f-9093d423c886
#> INFO [17:01:39.934] [bbotk] Finished optimizing after 2 evaluation(s)
#> INFO [17:01:39.935] [bbotk] Result:
#> INFO [17:01:39.936] [bbotk] interaction_branch.selection prep_branch.selection pca.rank. ica.n.comp
#> INFO [17:01:39.936] [bbotk] nop_filter nop_prep NA NA
#> INFO [17:01:39.936] [bbotk] yeojohnson.standardize classif.ranger.mtry.ratio classif.ranger.max.depth
#> INFO [17:01:39.936] [bbotk] NA 0.5291319 5
#> INFO [17:01:39.936] [bbotk] learner_param_vals x_domain classif.acc
#> INFO [17:01:39.936] [bbotk] <list[12]> <list[4]> 0.5650453
Created on 2022-03-17 by the reprex package (v2.0.1)