Error that occurs when using the custom metric during parallel hyperparameter tuning.
I have defined a custom metric (partial ROC AUC) myself, the code is as follows:
# Load packages
library(tidyverse)
library(tidymodels)
library(modeldata)
library(finetune)
library(baguette)
library(doParallel)
ncores = round(parallel::detectCores()/3)
# Logic for `event_level`
event_col <- function(truth, event_level) {
if (identical(event_level, "first")) {
levels(truth)[1]
} else {
levels(truth)[2]
}
}
pauc_impl <- function(truth, estimate, estimator = 'binary', event_level) {
if(estimator == "binary") {
level_case = event_col(truth = truth, event_level = event_level)
level_control = setdiff(levels(truth), level_case)
result = pROC::roc(estimate,
response = truth,
levels = c(level_control, level_case),
partial.auc = c(0.8,1),
partial.auc.focus = "sensitivity")
pauc_value = as.numeric(result$auc)
}
return(pauc_value)
}
pauc_vec <- function(truth,
estimate,
estimator = NULL,
na_rm = TRUE,
case_weights = NULL,
event_level = "first",
...) {
# calls finalize_estimator_internal() internally
estimator <- finalize_estimator(truth, estimator, metric_class = "pauc")
check_prob_metric(truth, estimate, case_weights, estimator)
if (na_rm) {
result <- yardstick_remove_missing(truth, estimate, case_weights)
truth <- result$truth
estimate <- result$estimate
case_weights <- result$case_weights
} else if (yardstick_any_missing(truth, estimate, case_weights)) {
return(NA_real_)
}
pauc_impl(truth, estimate, estimator, event_level)
}
pauc <- function(data, ...) {
UseMethod("pauc")
}
pauc <- new_prob_metric(pauc, direction = "maximize")
pauc.data.frame <- function(data,
truth,
estimate,
estimator = NULL,
na_rm = TRUE,
case_weights = NULL,
event_level = "first",
options = list()) {
prob_metric_summarizer(
name = "pauc",
fn = pauc_vec,
data = data,
truth = !!enquo(truth),
!!enquo(estimate),
estimator = estimator,
na_rm = na_rm,
case_weights = !!enquo(case_weights),
event_level = event_level,
fn_options = list(options = options)
)
}
I can use defined metric function pauc on the example:
pauc(data = two_class_example,truth = truth,Class1)
The results are as follows:
> pauc(data = two_class_example,truth = truth,Class1)
Setting direction: controls < cases
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 pauc binary 0.149
Then, I used tune_race_anova to tune the bag_tree model.
set.seed(123)
data("lending_club", package = "modeldata")
split <- initial_split(lending_club, strata = Class)
train <- training(split)
test <- testing(split)
fold = vfold_cv(data = train,v = 10,strata = Class)
rec <- recipe(Class ~ ., train) %>%
step_normalize(all_numeric())
mod <- bag_tree(tree_depth = tune()) %>%
set_engine("rpart") %>%
set_mode("classification")
wf_set <- workflow_set(
preproc = list(base = rec),
models = list(bag = mod),
cross = TRUE)
When not using parallel computation, using the defined pauc metric works correctly:
race_result = workflow_map(wf_set,
fn = 'tune_race_anova',
resamples = fold,
grid = 5,
metrics = metric_set(pauc))
race_result %>%
extract_workflow_set_result(id = 'base_bag') %>%
show_best(metric = 'pauc')
> race_result %>%
+ extract_workflow_set_result(id = 'base_bag') %>%
+ show_best(metric = 'pauc')
# A tibble: 1 × 7
tree_depth .metric .estimator mean n std_err .config
<int> <chr> <chr> <dbl> <int> <dbl> <chr>
1 6 pauc binary 0.0647 10 0.00621 Preprocessor1_Model2
However, when I use parallel computation, an error occurs:
cl = makePSOCKcluster(ncores)
registerDoParallel(cl)
race_result = workflow_map(wf_set,
fn = 'tune_race_anova',
resamples = fold,
grid = 5,
metrics = metric_set(pauc))
stopCluster(cl)
Warning message:
All models failed. Run `show_notes(.Last.tune.result)` for more information.
> show_notes(.Last.tune.result)
unique notes:
───────────────────────────────────────────────────────────────────────────────────
Error in `metric_set()`:
! Failed to compute `pauc()`.
Caused by error in `UseMethod()`:
! no applicable method for 'pauc' applied to an object of class "c('grouped_df', 'tbl_df', 'tbl', 'data.frame')"
When I use roc_auc as the metric for parallel hyperparameter tuning, everything works fine. Therefore, I believe the source of the error is in the parallel computation.
Just noting that I can reproduce this.
An admittedly cumbersome way around this right now is to drop pauc in a package and then supply it to control_race(pkgs), but we ought to think about how we can better support this.
@simonpcouch Sorry, I should have loaded the packages at the beginning. Now these results can be reproduced.
I'm not sure I understand this most recent reply, but we'll be coming back to this as we continue to improve our support for parallelism!
I'm not sure I understand this most recent reply, but we'll be coming back to this as we continue to improve our support for parallelism!
Anyway, thank you!
When I change "cl = makePSOCKcluster(ncores)" to "cl = makeForkCluster(ncores)", everything works fine!
Turns out we can replicate this issue with tune alone—I just filed an issue there and will address soon! Thank you!