recipes
recipes copied to clipboard
`step_bs()` with fixed knots not matching `splines::bs()` output
The problem
I'm having trouble with fitting a cubic spline with fixed knots. I would expect the number of terms in the fitted model to be equal to degree + 1 + (# knots), however, in the fitted model I am only seeing degree + 1 terms. This makes me think that the knots that are passed through the options list are not being used. This is confirmed by fitting a regression spline with deg_free = degree and getting the same fitted results.
Any insight would be greatly appreciated. I have not been able to find an example with step_bs and fixed knots.
Reproducible example
library(ISLR)
library(splines)
library(tidymodels)
lm_spec <- linear_reg()
## fixed knots
bs_fixed_rec <- recipe(wage ~ age, data = Wage) |>
step_bs(age, degree = 4, options = list(knots = c(25, 40, 60)))
bs_fixed_wf <- workflow() |>
add_model(lm_spec) |>
add_recipe(bs_fixed_rec)
bs_fixed_fit <- bs_fixed_wf |>
fit(data = Wage)
bs_fixed_fit |> tidy()
#> # A tibble: 5 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 51.9 5.30 9.80 2.39e-22
#> 2 age_bs_1 106. 17.0 6.21 6.19e-10
#> 3 age_bs_2 45.9 16.0 2.86 4.21e- 3
#> 4 age_bs_3 88.9 22.0 4.03 5.60e- 5
#> 5 age_bs_4 29.6 13.4 2.21 2.69e- 2
## matches df = degree
bs_rec <- recipe(wage ~ age, data = Wage) |>
step_bs(age, degree = 4, deg_free = 4)
bs_wf <- workflow() |>
add_model(lm_spec) |>
add_recipe(bs_rec)
bs_fit <- bs_wf |>
fit(data = Wage)
bs_fit |> tidy()
#> # A tibble: 5 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 51.9 5.30 9.80 2.39e-22
#> 2 age_bs_1 106. 17.0 6.21 6.19e-10
#> 3 age_bs_2 45.9 16.0 2.86 4.21e- 3
#> 4 age_bs_3 88.9 22.0 4.03 5.60e- 5
#> 5 age_bs_4 29.6 13.4 2.21 2.69e- 2
## vs lm call
lm(wage ~ splines::bs(age, degree = 3, knots = c(25, 40, 60)), data = Wage)
#>
#> Call:
#> lm(formula = wage ~ splines::bs(age, degree = 3, knots = c(25,
#> 40, 60)), data = Wage)
#>
#> Coefficients:
#> (Intercept)
#> 60.49
#> splines::bs(age, degree = 3, knots = c(25, 40, 60))1
#> 3.98
#> splines::bs(age, degree = 3, knots = c(25, 40, 60))2
#> 44.63
#> splines::bs(age, degree = 3, knots = c(25, 40, 60))3
#> 62.84
#> splines::bs(age, degree = 3, knots = c(25, 40, 60))4
#> 55.99
#> splines::bs(age, degree = 3, knots = c(25, 40, 60))5
#> 50.69
#> splines::bs(age, degree = 3, knots = c(25, 40, 60))6
#> 16.61
Created on 2022-10-26 with reprex v2.0.2
Thank you, this appears to be a bug!
This issue has been fixed in https://github.com/tidymodels/recipes/pull/1338
library(ISLR)
library(splines)
library(tidymodels)
lm_spec <- linear_reg()
## fixed knots
bs_fixed_rec <- recipe(wage ~ age, data = Wage) |>
step_bs(age, degree = 4, options = list(knots = c(25, 40, 60)))
bs_fixed_wf <- workflow() |>
add_model(lm_spec) |>
add_recipe(bs_fixed_rec)
bs_fixed_fit <- bs_fixed_wf |>
fit(data = Wage)
bs_fixed_fit |> tidy()
#> # A tibble: 8 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 59.2 10.4 5.70 0.0000000134
#> 2 age_bs_1 7.45 15.0 0.497 0.619
#> 3 age_bs_2 26.9 11.2 2.41 0.0162
#> 4 age_bs_3 74.9 13.9 5.39 0.0000000754
#> 5 age_bs_4 42.8 13.8 3.10 0.00199
#> 6 age_bs_5 78.5 16.7 4.71 0.00000253
#> 7 age_bs_6 21.7 18.7 1.16 0.246
#> 8 age_bs_7 30.5 21.4 1.43 0.154
lm(wage ~ splines::bs(age, degree = 4, knots = c(25, 40, 60)), data = Wage) %>%
tidy()
#> # A tibble: 8 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 59.2 10.4 5.70 1.34e-8
#> 2 splines::bs(age, degree = 4, knots = c(2… 7.45 15.0 0.497 6.19e-1
#> 3 splines::bs(age, degree = 4, knots = c(2… 26.9 11.2 2.41 1.62e-2
#> 4 splines::bs(age, degree = 4, knots = c(2… 74.9 13.9 5.39 7.54e-8
#> 5 splines::bs(age, degree = 4, knots = c(2… 42.8 13.8 3.10 1.99e-3
#> 6 splines::bs(age, degree = 4, knots = c(2… 78.5 16.7 4.71 2.53e-6
#> 7 splines::bs(age, degree = 4, knots = c(2… 21.7 18.7 1.16 2.46e-1
#> 8 splines::bs(age, degree = 4, knots = c(2… 30.5 21.4 1.43 1.54e-1
Created on 2024-06-07 with reprex v2.1.0
This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex https://reprex.tidyverse.org) and link to this issue.