recipes
recipes copied to clipboard
id fields converted to NA
From this post.
library(dplyr, warn.conflicts = FALSE)
library(recipes, warn.conflicts = FALSE)
data <- tibble(
id = letters[1:12],
output = rnorm(12, mean = 0),
pred1 = rnorm(12, mean = 10),
pred2 = rnorm(12, mean = 20),
pred3 = factor(rep(c('f1', 'f2', 'f3'), 4))
)
data$pred1[c(1,6)] <- NA
data$pred2[c(2,7)] <- NA
df_train <- data[1:5,]
df_test <- data[6:10,]
rec_obj <- recipe(x = df_train) %>%
update_role(output, new_role = 'outcome') %>%
update_role(id, new_role = "id variable") %>%
update_role(-output, -id, new_role = 'predictor') %>%
step_dummy(pred3) %>%
step_center(pred1, pred2) %>%
step_scale(pred1, pred2) %>%
step_medianimpute(all_predictors())
rec_trained <- prep(rec_obj, training = df_train)
train_data <- bake(rec_trained, new_data = df_train)
test_data <- bake(rec_trained, new_data = df_test)
test_data
#> # A tibble: 5 x 6
#> id output pred1 pred2 pred3_f2 pred3_f3
#> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 <NA> -1.66 0.155 -0.484 0 1
#> 2 <NA> -0.0917 -1.63 -0.375 0 0
#> 3 <NA> 0.0398 0.143 -0.975 1 0
#> 4 <NA> 0.0514 0.612 0.110 0 1
#> 5 <NA> -0.746 -2.95 -2.93 0 0
Created on 2019-02-10 by the reprex package (v0.2.1)