purrr
purrr copied to clipboard
Data masking inside pmap() when value for data mask is provided by a column
I'd like to implement pmap()
on a nested tibble and do data masking within the .f
argument, taking the data masking value from a column. Unfortunately the data masking seems to work only when the data masking value is given by a scalar object from the environment. Is this a bug?
How can this be solved, both for the mutate()
example 1, and the group_by()
example 2?
suppressPackageStartupMessages(library(dplyr))
library(tidyr)
library(purrr)
# EXAMPLE 1 (mutate). Does not work, and didn't find a solution here:
iris %>%
nest(measured = -Species) %>%
mutate(to_duplicate = ifelse(Species == "setosa",
"Sepal.Length",
"Petal.Width"),
measured =
map2(measured, to_duplicate, function(m, d) {
m %>%
mutate(newcol = .data[[d]])
}))
#> Error in splice(dot_call(capture_dots, frame_env = frame_env, named = named, : object 'd' not found
# EXAMPLE 2 (group_by). Not working:
iris %>%
mutate(subspecies = rep(LETTERS[1:15], each = 10),
groupvariable = "subspecies") %>%
nest(measured = -c(Species, groupvariable)) %>%
mutate(measured =
map2(measured, groupvariable, function(m, g) {
m %>%
group_by(.data[[g]]) %>%
summarise(newcol = mean(Petal.Width))
}))
#> Error in splice(dot_call(capture_dots, frame_env = frame_env, named = named, : object 'g' not found
# Workaround for example 2.
# group_by_() still works to do that, however it is deprecated:
iris %>%
mutate(subspecies = rep(LETTERS[1:15], each = 10),
groupvariable = "subspecies") %>%
nest(measured = -c(Species, groupvariable)) %>%
mutate(measured =
map2(measured, groupvariable, function(m, g) {
m %>%
group_by_(g) %>%
summarise(newcol = mean(Petal.Width))
}))
#> Warning: `group_by_()` was deprecated in dplyr 0.7.0.
#> Please use `group_by()` instead.
#> See vignette('programming') for more help
#> # A tibble: 3 x 3
#> Species groupvariable measured
#> <fct> <chr> <list>
#> 1 setosa subspecies <tibble[,2] [5 × 2]>
#> 2 versicolor subspecies <tibble[,2] [5 × 2]>
#> 3 virginica subspecies <tibble[,2] [5 × 2]>
# In the above example, g had a constant value throughout iris, so assigning as a global variable works
# However I'd like it to work as well when its value varies in iris, so this is not a solution
g <- "subspecies"
iris %>%
mutate(subspecies = rep(LETTERS[1:15], each = 10)) %>%
nest(measured = -Species) %>%
mutate(measured =
map(measured, function(m) {
m %>%
group_by(.data[[g]]) %>%
summarise(newcol = mean(Petal.Width))
}))
#> # A tibble: 3 x 2
#> Species measured
#> <fct> <list>
#> 1 setosa <tibble[,2] [5 × 2]>
#> 2 versicolor <tibble[,2] [5 × 2]>
#> 3 virginica <tibble[,2] [5 × 2]>
Created on 2021-04-28 by the reprex package (v2.0.0)
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.5 (2021-03-31)
#> os Linux Mint 20
#> system x86_64, linux-gnu
#> ui X11
#> language nl_BE:nl
#> collate nl_BE.UTF-8
#> ctype nl_BE.UTF-8
#> tz Europe/Brussels
#> date 2021-04-28
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.2)
#> cli 2.4.0 2021-04-05 [1] CRAN (R 4.0.5)
#> crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.3)
#> DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.3)
#> digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.3)
#> dplyr * 1.0.5 2021-03-05 [1] CRAN (R 4.0.5)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.2)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.2)
#> fansi 0.4.2 2021-01-15 [1] CRAN (R 4.0.3)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> generics 0.1.0 2020-10-31 [1] CRAN (R 4.0.3)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.2)
#> htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.3)
#> knitr 1.31 2021-01-27 [1] CRAN (R 4.0.3)
#> lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.4)
#> magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.3)
#> pillar 1.5.1 2021-03-05 [1] CRAN (R 4.0.5)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.2)
#> ps 1.6.0 2021-02-28 [1] CRAN (R 4.0.5)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.0.2)
#> R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.3)
#> reprex 2.0.0 2021-04-02 [1] CRAN (R 4.0.5)
#> rlang 0.4.10 2020-12-30 [1] CRAN (R 4.0.3)
#> rmarkdown 2.7 2021-02-19 [1] CRAN (R 4.0.4)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.3)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.2)
#> stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
#> tibble 3.1.0 2021-02-25 [1] CRAN (R 4.0.5)
#> tidyr * 1.1.3 2021-03-03 [1] CRAN (R 4.0.5)
#> tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.2)
#> utf8 1.2.1 2021-03-12 [1] CRAN (R 4.0.5)
#> vctrs 0.3.7 2021-03-29 [1] CRAN (R 4.0.5)
#> withr 2.4.1 2021-01-26 [1] CRAN (R 4.0.3)
#> xfun 0.22 2021-03-11 [1] CRAN (R 4.0.4)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
#>
#> [1] /home/floris/lib/R/library
#> [2] /usr/local/lib/R/site-library
#> [3] /usr/lib/R/site-library
#> [4] /usr/lib/R/library