mlr3 icon indicating copy to clipboard operation
mlr3 copied to clipboard

Assertion on 'types' failed

Open MislavSag opened this issue 2 years ago • 1 comments

I get an error inside my pipeline and I can't figure out what is the problem.

Here is reproducible example:

data_ <- fread("https://contentiobatch.blob.core.windows.net/qc-live/pead_test.csv")
data_$bin_extreme_ret_excess_stand_66 <- as.factor(data_$bin_extreme_ret_excess_stand_66)
task_ <- as_task_classif(data_, id = "extreme", target = "bin_extreme_ret_excess_stand_66", positive = "1")
library(mlr3verse)

learner = lrn("classif.ranger", predict_type = "prob")
graph = po("removeconstants", ratio = 0.05) %>>%
  # modelmatrix
  po("branch", options = c("nop_filter", "modelmatrix"), id = "interaction_branch") %>>%
  gunion(list(po("nop", id = "nop_filter"), po("modelmatrix", formula = ~ . ^ 2))) %>>%
  po("unbranch", id = "interaction_unbranch") %>>%
  # scaling
  po("branch", options = c("nop_prep", "yeojohnson", "pca", "ica"), id = "prep_branch") %>>%
  gunion(list(po("nop", id = "nop_prep"), po("yeojohnson"), po("pca", scale. = TRUE), po("ica"))) %>>%
  po("unbranch", id = "prep_unbranch") %>>%
  learner
plot(graph)
graph_learner = as_learner(graph)
as.data.table(graph_learner$param_set)[1:65, .(id, class, lower, upper)]
search_space = ps(
  # preprocesing
  interaction_branch.selection = p_fct(levels = c("nop_filter", "modelmatrix")),
  prep_branch.selection = p_fct(levels = c("nop_prep", "yeojohnson", "pca", "ica")),
  pca.rank. = p_int(2, 6, depends = prep_branch.selection == "pca"),
  ica.n.comp = p_int(2, 6, depends = prep_branch.selection == "ica"),
  yeojohnson.standardize = p_lgl(depends = prep_branch.selection == "yeojohnson"),
  # model
  classif.ranger.mtry.ratio = p_dbl(0.2, 1),
  classif.ranger.max.depth = p_int(2, 6)
)
# plan("multisession", workers = 4L)
at_ranger = auto_tuner(
  method = "random_search",
  learner = graph_learner,
  resampling = rsmp("cv", folds = 3),
  measure = msr("classif.acc"),
  search_space = search_space,
  term_evals = 20
)
at_ranger$train(task_extreme)

The error I get is:

Error in selector_type(private$.feature_types) : 
  Assertion on 'types' failed: Must be a subset of {'logical','integer','numeric','character','factor','ordered','POSIXct'}, but is {'logical','integer','numeric','character','factor','ordered','POSIXct','imageuri'}.
This happened PipeOp modelmatrix's $train()

It seems like I have imageuri class in my table, but not sure how is that possible ?

MislavSag avatar Mar 02 '22 08:03 MislavSag

Cannot reproduce (see reprex below). imageuri is a new feature type in mlr3torch (which is currently experimental), is it possible that this somehow caused the bug?

library(mlr3verse)
#> Loading required package: mlr3
library(data.table)

data_ = fread("https://contentiobatch.blob.core.windows.net/qc-live/pead_test.csv")
data_$bin_extreme_ret_excess_stand_66 = as.factor(data_$bin_extreme_ret_excess_stand_66)
task_extreme = as_task_classif(data_, id = "extreme", target = "bin_extreme_ret_excess_stand_66", positive = "1")

learner = lrn("classif.ranger", predict_type = "prob")
graph = po("removeconstants", ratio = 0.05) %>>%
  # modelmatrix
  po("branch", options = c("nop_filter", "modelmatrix"), id = "interaction_branch") %>>%
  gunion(list(po("nop", id = "nop_filter"), po("modelmatrix", formula = ~ .^2))) %>>%
  po("unbranch", id = "interaction_unbranch") %>>%
  # scaling
  po("branch", options = c("nop_prep", "yeojohnson", "pca", "ica"), id = "prep_branch") %>>%
  gunion(list(po("nop", id = "nop_prep"), po("yeojohnson"), po("pca", scale. = TRUE), po("ica"))) %>>%
  po("unbranch", id = "prep_unbranch") %>>%
  learner
plot(graph)

graph_learner = as_learner(graph)
as.data.table(graph_learner$param_set)[1:65, .(id, class, lower, upper)]
#>                                              id    class lower upper
#>  1:                       removeconstants.ratio ParamDbl     0     1
#>  2:                     removeconstants.rel_tol ParamDbl     0   Inf
#>  3:                     removeconstants.abs_tol ParamDbl     0   Inf
#>  4:                   removeconstants.na_ignore ParamLgl    NA    NA
#>  5:              removeconstants.affect_columns ParamUty    NA    NA
#>  6:                interaction_branch.selection ParamFct    NA    NA
#>  7:                         modelmatrix.formula ParamUty    NA    NA
#>  8:                  modelmatrix.affect_columns ParamUty    NA    NA
#>  9:                       prep_branch.selection ParamFct    NA    NA
#> 10:                              yeojohnson.eps ParamDbl     0   Inf
#> 11:                      yeojohnson.standardize ParamLgl    NA    NA
#> 12:                            yeojohnson.lower ParamDbl  -Inf   Inf
#> 13:                            yeojohnson.upper ParamDbl  -Inf   Inf
#> 14:                   yeojohnson.affect_columns ParamUty    NA    NA
#> 15:                                  pca.center ParamLgl    NA    NA
#> 16:                                  pca.scale. ParamLgl    NA    NA
#> 17:                                   pca.rank. ParamInt     1   Inf
#> 18:                          pca.affect_columns ParamUty    NA    NA
#> 19:                                  ica.n.comp ParamInt     1   Inf
#> 20:                                 ica.alg.typ ParamFct    NA    NA
#> 21:                                     ica.fun ParamFct    NA    NA
#> 22:                                   ica.alpha ParamDbl     1     2
#> 23:                                  ica.method ParamFct    NA    NA
#> 24:                                ica.row.norm ParamLgl    NA    NA
#> 25:                                   ica.maxit ParamInt     1   Inf
#> 26:                                     ica.tol ParamDbl     0   Inf
#> 27:                                 ica.verbose ParamLgl    NA    NA
#> 28:                                  ica.w.init ParamUty    NA    NA
#> 29:                          ica.affect_columns ParamUty    NA    NA
#> 30:                        classif.ranger.alpha ParamDbl  -Inf   Inf
#> 31:       classif.ranger.always.split.variables ParamUty    NA    NA
#> 32:                classif.ranger.class.weights ParamUty    NA    NA
#> 33:                      classif.ranger.holdout ParamLgl    NA    NA
#> 34:                   classif.ranger.importance ParamFct    NA    NA
#> 35:                   classif.ranger.keep.inbag ParamLgl    NA    NA
#> 36:                    classif.ranger.max.depth ParamInt     0   Inf
#> 37:                classif.ranger.min.node.size ParamInt     1   Inf
#> 38:                     classif.ranger.min.prop ParamDbl  -Inf   Inf
#> 39:                      classif.ranger.minprop ParamDbl  -Inf   Inf
#> 40:                         classif.ranger.mtry ParamInt     1   Inf
#> 41:                   classif.ranger.mtry.ratio ParamDbl     0     1
#> 42:            classif.ranger.num.random.splits ParamInt     1   Inf
#> 43:                  classif.ranger.num.threads ParamInt     1   Inf
#> 44:                    classif.ranger.num.trees ParamInt     1   Inf
#> 45:                    classif.ranger.oob.error ParamLgl    NA    NA
#> 46:        classif.ranger.regularization.factor ParamUty    NA    NA
#> 47:      classif.ranger.regularization.usedepth ParamLgl    NA    NA
#> 48:                      classif.ranger.replace ParamLgl    NA    NA
#> 49:    classif.ranger.respect.unordered.factors ParamFct    NA    NA
#> 50:              classif.ranger.sample.fraction ParamDbl     0     1
#> 51:                  classif.ranger.save.memory ParamLgl    NA    NA
#> 52: classif.ranger.scale.permutation.importance ParamLgl    NA    NA
#> 53:                    classif.ranger.se.method ParamFct    NA    NA
#> 54:                         classif.ranger.seed ParamInt  -Inf   Inf
#> 55:         classif.ranger.split.select.weights ParamUty    NA    NA
#> 56:                    classif.ranger.splitrule ParamFct    NA    NA
#> 57:                      classif.ranger.verbose ParamLgl    NA    NA
#> 58:                 classif.ranger.write.forest ParamLgl    NA    NA
#> 59:                                        <NA>     <NA>    NA    NA
#> 60:                                        <NA>     <NA>    NA    NA
#> 61:                                        <NA>     <NA>    NA    NA
#> 62:                                        <NA>     <NA>    NA    NA
#> 63:                                        <NA>     <NA>    NA    NA
#> 64:                                        <NA>     <NA>    NA    NA
#> 65:                                        <NA>     <NA>    NA    NA
#>                                              id    class lower upper
search_space = ps(
  # preprocesing
  interaction_branch.selection = p_fct(levels = c("nop_filter", "modelmatrix")),
  prep_branch.selection = p_fct(levels = c("nop_prep", "yeojohnson", "pca", "ica")),
  pca.rank. = p_int(2, 6, depends = prep_branch.selection == "pca"),
  ica.n.comp = p_int(2, 6, depends = prep_branch.selection == "ica"),
  yeojohnson.standardize = p_lgl(depends = prep_branch.selection == "yeojohnson"),
  # model
  classif.ranger.mtry.ratio = p_dbl(0.2, 1),
  classif.ranger.max.depth = p_int(2, 6)
)
# plan("multisession", workers = 4L)
at_ranger = auto_tuner(
  method = "random_search",
  learner = graph_learner,
  resampling = rsmp("cv", folds = 3),
  measure = msr("classif.acc"),
  search_space = search_space,
  term_evals = 2
)
at_ranger$train(task_extreme)
#> INFO  [17:01:28.870] [bbotk] Starting to optimize 7 parameter(s) with '<OptimizerRandomSearch>' and '<TerminatorEvals> [n_evals=2, k=0]' 
#> INFO  [17:01:28.934] [bbotk] Evaluating 1 configuration(s) 
#> INFO  [17:01:29.007] [mlr3] Running benchmark with 3 resampling iterations 
#> INFO  [17:01:29.057] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 1/3) 
#> INFO  [17:01:30.263] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 3/3) 
#> INFO  [17:01:31.401] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 2/3) 
#> INFO  [17:01:32.557] [mlr3] Finished benchmark 
#> INFO  [17:01:32.591] [bbotk] Result of batch 1: 
#> INFO  [17:01:32.593] [bbotk]  interaction_branch.selection prep_branch.selection pca.rank. ica.n.comp 
#> INFO  [17:01:32.593] [bbotk]                    nop_filter              nop_prep        NA         NA 
#> INFO  [17:01:32.593] [bbotk]  yeojohnson.standardize classif.ranger.mtry.ratio classif.ranger.max.depth 
#> INFO  [17:01:32.593] [bbotk]                      NA                 0.7816803                        2 
#> INFO  [17:01:32.593] [bbotk]  classif.acc warnings errors runtime_learners 
#> INFO  [17:01:32.593] [bbotk]    0.5555177        0      0            3.463 
#> INFO  [17:01:32.593] [bbotk]                                 uhash 
#> INFO  [17:01:32.593] [bbotk]  a06deead-52dd-41a6-8ebd-6ac0d92ee6f1 
#> INFO  [17:01:32.604] [bbotk] Evaluating 1 configuration(s) 
#> INFO  [17:01:32.664] [mlr3] Running benchmark with 3 resampling iterations 
#> INFO  [17:01:32.669] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 3/3) 
#> INFO  [17:01:34.955] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 1/3) 
#> INFO  [17:01:37.279] [mlr3] Applying learner 'removeconstants.interaction_branch.nop_filter.modelmatrix.interaction_unbranch.prep_branch.nop_prep.yeojohnson.pca.ica.prep_unbranch.classif.ranger' on task 'extreme' (iter 2/3) 
#> INFO  [17:01:39.868] [mlr3] Finished benchmark 
#> INFO  [17:01:39.900] [bbotk] Result of batch 2: 
#> INFO  [17:01:39.901] [bbotk]  interaction_branch.selection prep_branch.selection pca.rank. ica.n.comp 
#> INFO  [17:01:39.901] [bbotk]                    nop_filter              nop_prep        NA         NA 
#> INFO  [17:01:39.901] [bbotk]  yeojohnson.standardize classif.ranger.mtry.ratio classif.ranger.max.depth 
#> INFO  [17:01:39.901] [bbotk]                      NA                 0.5291319                        5 
#> INFO  [17:01:39.901] [bbotk]  classif.acc warnings errors runtime_learners 
#> INFO  [17:01:39.901] [bbotk]    0.5650453        0      0            7.172 
#> INFO  [17:01:39.901] [bbotk]                                 uhash 
#> INFO  [17:01:39.901] [bbotk]  cbcf181e-a15c-4712-879f-9093d423c886 
#> INFO  [17:01:39.934] [bbotk] Finished optimizing after 2 evaluation(s) 
#> INFO  [17:01:39.935] [bbotk] Result: 
#> INFO  [17:01:39.936] [bbotk]  interaction_branch.selection prep_branch.selection pca.rank. ica.n.comp 
#> INFO  [17:01:39.936] [bbotk]                    nop_filter              nop_prep        NA         NA 
#> INFO  [17:01:39.936] [bbotk]  yeojohnson.standardize classif.ranger.mtry.ratio classif.ranger.max.depth 
#> INFO  [17:01:39.936] [bbotk]                      NA                 0.5291319                        5 
#> INFO  [17:01:39.936] [bbotk]  learner_param_vals  x_domain classif.acc 
#> INFO  [17:01:39.936] [bbotk]          <list[12]> <list[4]>   0.5650453

Created on 2022-03-17 by the reprex package (v2.0.1)

sebffischer avatar Mar 17 '22 16:03 sebffischer