recipes icon indicating copy to clipboard operation
recipes copied to clipboard

id fields converted to NA

Open topepo opened this issue 6 years ago • 2 comments

From this post.

library(dplyr, warn.conflicts = FALSE)
library(recipes, warn.conflicts = FALSE)

data <- tibble(
  id = letters[1:12],
  output = rnorm(12, mean = 0),
  pred1 = rnorm(12, mean = 10),
  pred2 = rnorm(12, mean = 20),
  pred3 = factor(rep(c('f1', 'f2', 'f3'), 4))
)

data$pred1[c(1,6)] <- NA
data$pred2[c(2,7)] <- NA
df_train <- data[1:5,]
df_test <- data[6:10,]

rec_obj <- recipe(x = df_train) %>%
  update_role(output, new_role = 'outcome') %>%
  update_role(id, new_role = "id variable") %>%
  update_role(-output, -id, new_role = 'predictor') %>%
  step_dummy(pred3) %>%
  step_center(pred1, pred2) %>%
  step_scale(pred1, pred2) %>%
  step_medianimpute(all_predictors())

rec_trained <- prep(rec_obj, training = df_train)
train_data    <- bake(rec_trained, new_data = df_train)
test_data     <- bake(rec_trained, new_data = df_test)
test_data
#> # A tibble: 5 x 6
#>   id     output  pred1  pred2 pred3_f2 pred3_f3
#>   <fct>   <dbl>  <dbl>  <dbl>    <dbl>    <dbl>
#> 1 <NA>  -1.66    0.155 -0.484        0        1
#> 2 <NA>  -0.0917 -1.63  -0.375        0        0
#> 3 <NA>   0.0398  0.143 -0.975        1        0
#> 4 <NA>   0.0514  0.612  0.110        0        1
#> 5 <NA>  -0.746  -2.95  -2.93         0        0

Created on 2019-02-10 by the reprex package (v0.2.1)

topepo avatar Feb 10 '19 21:02 topepo