purrr icon indicating copy to clipboard operation
purrr copied to clipboard

Data masking inside pmap() when value for data mask is provided by a column

Open florisvdh opened this issue 3 years ago • 0 comments

I'd like to implement pmap() on a nested tibble and do data masking within the .f argument, taking the data masking value from a column. Unfortunately the data masking seems to work only when the data masking value is given by a scalar object from the environment. Is this a bug?

How can this be solved, both for the mutate() example 1, and the group_by() example 2?

suppressPackageStartupMessages(library(dplyr))
library(tidyr)
library(purrr)

# EXAMPLE 1 (mutate). Does not work, and didn't find a solution here:
iris %>% 
    nest(measured = -Species) %>% 
    mutate(to_duplicate = ifelse(Species == "setosa",
                                 "Sepal.Length",
                                 "Petal.Width"),
           measured = 
               map2(measured, to_duplicate, function(m, d) {
                   m %>% 
                       mutate(newcol = .data[[d]])
           }))
#> Error in splice(dot_call(capture_dots, frame_env = frame_env, named = named, : object 'd' not found

# EXAMPLE 2 (group_by). Not working:
iris %>% 
    mutate(subspecies = rep(LETTERS[1:15], each = 10),
           groupvariable = "subspecies") %>% 
    nest(measured = -c(Species, groupvariable)) %>% 
    mutate(measured = 
               map2(measured, groupvariable, function(m, g) {
                   m %>% 
                       group_by(.data[[g]]) %>% 
                       summarise(newcol = mean(Petal.Width))
               }))
#> Error in splice(dot_call(capture_dots, frame_env = frame_env, named = named, : object 'g' not found

# Workaround for example 2.
# group_by_() still works to do that, however it is deprecated:
iris %>% 
    mutate(subspecies = rep(LETTERS[1:15], each = 10),
           groupvariable = "subspecies") %>% 
    nest(measured = -c(Species, groupvariable)) %>% 
    mutate(measured = 
               map2(measured, groupvariable, function(m, g) {
                   m %>% 
                       group_by_(g) %>% 
                       summarise(newcol = mean(Petal.Width))
               }))
#> Warning: `group_by_()` was deprecated in dplyr 0.7.0.
#> Please use `group_by()` instead.
#> See vignette('programming') for more help
#> # A tibble: 3 x 3
#>   Species    groupvariable measured            
#>   <fct>      <chr>         <list>              
#> 1 setosa     subspecies    <tibble[,2] [5 × 2]>
#> 2 versicolor subspecies    <tibble[,2] [5 × 2]>
#> 3 virginica  subspecies    <tibble[,2] [5 × 2]>

# In the above example, g had a constant value throughout iris, so assigning as a global variable works
# However I'd like it to work as well when its value varies in iris, so this is not a solution
g <- "subspecies"
iris %>% 
    mutate(subspecies = rep(LETTERS[1:15], each = 10)) %>% 
    nest(measured = -Species) %>% 
    mutate(measured = 
               map(measured, function(m) {
                   m %>% 
                       group_by(.data[[g]]) %>% 
                       summarise(newcol = mean(Petal.Width))
               }))
#> # A tibble: 3 x 2
#>   Species    measured            
#>   <fct>      <list>              
#> 1 setosa     <tibble[,2] [5 × 2]>
#> 2 versicolor <tibble[,2] [5 × 2]>
#> 3 virginica  <tibble[,2] [5 × 2]>

Created on 2021-04-28 by the reprex package (v2.0.0)

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 4.0.5 (2021-03-31)
#>  os       Linux Mint 20               
#>  system   x86_64, linux-gnu           
#>  ui       X11                         
#>  language nl_BE:nl                    
#>  collate  nl_BE.UTF-8                 
#>  ctype    nl_BE.UTF-8                 
#>  tz       Europe/Brussels             
#>  date     2021-04-28                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date       lib source        
#>  assertthat    0.2.1   2019-03-21 [1] CRAN (R 4.0.2)
#>  cli           2.4.0   2021-04-05 [1] CRAN (R 4.0.5)
#>  crayon        1.4.1   2021-02-08 [1] CRAN (R 4.0.3)
#>  DBI           1.1.1   2021-01-15 [1] CRAN (R 4.0.3)
#>  digest        0.6.27  2020-10-24 [1] CRAN (R 4.0.3)
#>  dplyr       * 1.0.5   2021-03-05 [1] CRAN (R 4.0.5)
#>  ellipsis      0.3.1   2020-05-15 [1] CRAN (R 4.0.2)
#>  evaluate      0.14    2019-05-28 [1] CRAN (R 4.0.2)
#>  fansi         0.4.2   2021-01-15 [1] CRAN (R 4.0.3)
#>  fs            1.5.0   2020-07-31 [1] CRAN (R 4.0.2)
#>  generics      0.1.0   2020-10-31 [1] CRAN (R 4.0.3)
#>  glue          1.4.2   2020-08-27 [1] CRAN (R 4.0.2)
#>  highr         0.8     2019-03-20 [1] CRAN (R 4.0.2)
#>  htmltools     0.5.1.1 2021-01-22 [1] CRAN (R 4.0.3)
#>  knitr         1.31    2021-01-27 [1] CRAN (R 4.0.3)
#>  lifecycle     1.0.0   2021-02-15 [1] CRAN (R 4.0.4)
#>  magrittr      2.0.1   2020-11-17 [1] CRAN (R 4.0.3)
#>  pillar        1.5.1   2021-03-05 [1] CRAN (R 4.0.5)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.0.2)
#>  ps            1.6.0   2021-02-28 [1] CRAN (R 4.0.5)
#>  purrr       * 0.3.4   2020-04-17 [1] CRAN (R 4.0.2)
#>  R6            2.5.0   2020-10-28 [1] CRAN (R 4.0.3)
#>  reprex        2.0.0   2021-04-02 [1] CRAN (R 4.0.5)
#>  rlang         0.4.10  2020-12-30 [1] CRAN (R 4.0.3)
#>  rmarkdown     2.7     2021-02-19 [1] CRAN (R 4.0.4)
#>  rstudioapi    0.13    2020-11-12 [1] CRAN (R 4.0.3)
#>  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 4.0.2)
#>  stringi       1.5.3   2020-09-09 [1] CRAN (R 4.0.2)
#>  stringr       1.4.0   2019-02-10 [1] CRAN (R 4.0.2)
#>  tibble        3.1.0   2021-02-25 [1] CRAN (R 4.0.5)
#>  tidyr       * 1.1.3   2021-03-03 [1] CRAN (R 4.0.5)
#>  tidyselect    1.1.0   2020-05-11 [1] CRAN (R 4.0.2)
#>  utf8          1.2.1   2021-03-12 [1] CRAN (R 4.0.5)
#>  vctrs         0.3.7   2021-03-29 [1] CRAN (R 4.0.5)
#>  withr         2.4.1   2021-01-26 [1] CRAN (R 4.0.3)
#>  xfun          0.22    2021-03-11 [1] CRAN (R 4.0.4)
#>  yaml          2.2.1   2020-02-01 [1] CRAN (R 4.0.2)
#> 
#> [1] /home/floris/lib/R/library
#> [2] /usr/local/lib/R/site-library
#> [3] /usr/lib/R/site-library
#> [4] /usr/lib/R/library

florisvdh avatar Apr 28 '21 16:04 florisvdh