purrr icon indicating copy to clipboard operation
purrr copied to clipboard

Unexpected behavior of `lmap_if` when `.f` alters structure of `.x` and a function is supplied to `.else`

Open TimTeaFan opened this issue 3 years ago • 1 comments

I was playing around with the lmap_if function and encountered an unexpected behaviour when a function is provided to .else while the function in .f is altering the structure of the object that lmap_if is applied to. This behavior might be intended, in this case please close this issue.

# setup
library(purrr)

Here is a simple function, similar to the one in the documentation. This function just duplicates a list element.

list_rep <- function(x) {
  out <- rep_len(x, 2)
  names(out) <- paste0(names(x), 1:2)
  out
}

This is the example list from the documentation:

x <- list(a = 1:4, b = letters[5:7], c = 8:9, d = letters[10])

Lets apply lmap_if and say we want to duplicate each numeric list element and we want to replace all other elements with list(z = 1).

lmap_if(x, 
        .p = is.numeric,
        .f = list_rep,
        .else = ~ list(z = 1))
#> $a1
#> [1] 1 2 3 4
#> 
#> $z
#> [1] 1
#> 
#> $b
#> [1] "e" "f" "g"
#> 
#> $z
#> [1] 1
#> 
#> $c2
#> [1] 8 9
#> 
#> $d
#> [1] "j"

The output is somewhat unexpected. Rewritting lmap_if as a loop - at least the way I thought it would work - would yield the following result.

out <- vector("list", length = length(x))

for (i in seq_along(x)) {
  out[[i]] <- if (is.numeric(x[[i]])) {
    list_rep(x[i]) 
  } else {
    list(z = 1)
  }
}
out <- flatten(out)
out
#> $a1
#> [1] 1 2 3 4
#> 
#> $a2
#> [1] 1 2 3 4
#> 
#> $z
#> [1] 1
#> 
#> $c1
#> [1] 8 9
#> 
#> $c2
#> [1] 8 9
#> 
#> $z
#> [1] 1

The reason for this unexpected behavior is that lmap_if creates an selector based on the function in .p and then applies lmap_at first on all selected elements (which might alter the structure of .x) and then applies lmap_at again at the result of the first operation using all elements which were not selected. However, since the selector was created before the first call to lmap_at it doesn’t necessarily match the structure of .x anymore. Is this behavior intended and was just my mental model of what lmap_if is doing wrong?

Created on 2021-12-12 by the reprex package (v0.3.0)

TimTeaFan avatar Dec 12 '21 21:12 TimTeaFan

Just to add another example similar to the one in test-lmap.R:

(l1 <- list(a = 1, b = "foo"))
#> $a
#> [1] 1
#> 
#> $b
#> [1] "foo"

Lets assume we want to replace all character elements with list("bar", 2) and all other elements with list("baz", 3):

library(purrr)

lmap_if(l1,
        is.character,
        ~ list("bar", 2),
        .else = ~ list("baz", 3))
#> [[1]]
#> [1] "baz"
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] "bar"
#> 
#> [[4]]
#> [1] 2

If we change the order of elements in the initial list, the result is somewhat unexpected:

(l2 <-  list(a = "foo", b = 1))
#> $a
#> [1] "foo"
#> 
#> $b
#> [1] 1

lmap_if(l2,
        is.character,
        ~ list("bar", 2),
        .else = ~ list("baz", 3))
#> [[1]]
#> [1] "bar"
#> 
#> [[2]]
#> [1] "baz"
#> 
#> [[3]]
#> [1] 3
#> 
#> $b
#> [1] 1

From a pure if logic the result below would have been expected:

list("bar", 2, "baz", 3)
#> [[1]]
#> [1] "bar"
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] "baz"
#> 
#> [[4]]
#> [1] 3

Which is similar to what map_if with flatten would have done:

map_if(l2,
       is.character,
       ~ list("bar", 2),
       .else = ~ list("baz", 3)) %>%
  flatten
#> [[1]]
#> [1] "bar"
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] "baz"
#> 
#> [[4]]
#> [1] 3

Created on 2022-01-01 by the reprex package (v0.3.0)

TimTeaFan avatar Jan 01 '22 21:01 TimTeaFan

Could you please rework your reproducible example to use the reprex package ? That makes it easier to see both the input and the output, formatted in such a way that I can easily re-run in a local session.

hadley avatar Aug 23 '22 21:08 hadley

Apologies for splitting the original reprex into several pieces. Below is the code in one piece:

# setup
library(purrr)

# example 1
# example function and input list as simplified version of example in the docs

# function that doubles (repeats) a list element
list_rep <- function(x) {
  out <- rep_len(x, 2)
  names(out) <- paste0(names(x), 1:2)
  out
}

# input list
(x <- list(a = 1:4, b = letters[5:7], c = 8:9, d = letters[10]))

#> $a
#> [1] 1 2 3 4
#>
#> $b
#> [1] "e" "f" "g"
#>
#> $c
#> [1] 8 9
#>
#> $d
#> [1] "j"

lmap_if(x, 
        .p = is.numeric,
        .f = list_rep,
        .else = ~ list(z = 1))
#> $a1
#> [1] 1 2 3 4
#> 
#> $z
#> [1] 1
#> 
#> $b
#> [1] "e" "f" "g"
#> 
#> $z
#> [1] 1
#> 
#> $c2
#> [1] 8 9
#> 
#> $d
#> [1] "j"

# producing the expected output as a for loop

out <- vector("list", length = length(x))

for (i in seq_along(x)) {
  out[[i]] <- if (is.numeric(x[[i]])) {
    list_rep(x[i]) 
  } else {
    list(z = 1)
  }
}

out <- flatten(out)
out
#> $a1
#> [1] 1 2 3 4
#> 
#> $a2
#> [1] 1 2 3 4
#> 
#> $z
#> [1] 1
#> 
#> $c1
#> [1] 8 9
#> 
#> $c2
#> [1] 8 9
#> 
#> $z
#> [1] 1

# example 2 similar to the example in tests

(l1 <- list(a = 1, b = "foo"))
#> $a
#> [1] 1
#> 
#> $b
#> [1] "foo"

# let’s replace character elements with ‘list("bar", 2)‘ and all other elements with ‘list("baz", 3))’

lmap_if(l1,
        is.character,
        ~ list("bar", 2),
        .else = ~ list("baz", 3))
#> [[1]]
#> [1] "baz"
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] "bar"
#> 
#> [[4]]
#> [1] 2

# let’s change the order of our original list 
# the single elements of the output list shouldn’t change, only their order
# but below also the elements are different 
(l2 <-  list(a = "foo", b = 1))
#> $a
#> [1] "foo"
#> 
#> $b
#> [1] 1

lmap_if(l2,
        is.character,
        ~ list("bar", 2),
        .else = ~ list("baz", 3))
#> [[1]]
#> [1] "bar"
#> 
#> [[2]]
#> [1] "baz"
#> 
#> [[3]]
#> [1] 3
#> 
#> $b
#> [1] 1

# From a pure `if` logic the result below would have been expected:

(list("bar", 2, "baz", 3))
#> [[1]]
#> [1] "bar"
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] "baz"
#> 
#> [[4]]
#> [1] 3


# Which is similar to what `map_if` with `flatten` would have done:

map_if(l2,
       is.character,
       ~ list("bar", 2),
       .else = ~ list("baz", 3)) %>%
  flatten
#> [[1]]
#> [1] "bar"
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] "baz"
#> 
#> [[4]]
#> [1] 3

Created on 2022-08-24 by the reprex package (v0.3.0)

TimTeaFan avatar Aug 24 '22 09:08 TimTeaFan

Would you mind having a go at simplifying it so that I can more easily see the problem at a glance?

hadley avatar Aug 24 '22 10:08 hadley

For some context how I stumbled upon this odd behavior of lmap_if: I was translating {purrr} functions as for loops using the examples in the docs and in the test folder. My reprex above is already to some extent a simplified versions of those examples, but below I'll try to make the problem more explicit.

library(purrr)

# a function that returns a list with two character elements
char_ls_el <- function(x) {
  list("a", "b")
}

# a function that returns a list with two numeric elements
num_ls_el <- function(x) {
  list(1,2)
}

# input list, with one char and one numeric element
(x <- list("z", 99))
#> [[1]]
#> [1] "z"
#> 
#> [[2]]
#> [1] 99

# let’s replace each element using the two functions above
# the output is unexpected
lmap_if(x, is.character, char_ls_el, .else = num_ls_el)
#> [[1]]
#> [1] "a"
#> 
#> [[2]]
#> [1] 1
#> 
#> [[3]]
#> [1] 2
#> 
#> [[4]]
#> [1] 99

# we’d rather expected the following output
map_if(x, is.character, char_ls_el, .else = num_ls_el) %>%
  flatten()
#> [[1]]
#> [1] "a"
#> 
#> [[2]]
#> [1] "b"
#> 
#> [[3]]
#> [1] 1
#> 
#> [[4]]
#> [1] 2

# why does the output differ?
# let’s look inside `lmap_if` to find out

# under the hood `lmap_if` uses a selector `sel` and 
# calls `lmap_at` twice 

(sel <- purrr:::probe(x, is.character))
#> [1]  TRUE FALSE

# the problem arises when the function in .f is applied, 
# since this alters the structure of `x`: it has now 3 elements 
(x <- lmap_at(x, which(sel), char_ls_el))
#> [[1]]
#> [1] "a"
#> 
#> [[2]]
#> [1] "b"
#> 
#> [[3]]
#> [1] 99

# now the function in `.else` is applied based on a selector `sel`
# which doesn't correspond to the new structure of `x` anymore
# this leads to a change of the new second element of `x`
# which is already an output of the first call to `lmap_at`
(x <- lmap_at(x, which(!sel), num_ls_el))
#> [[1]]
#> [1] "a"
#> 
#> [[2]]
#> [1] 1
#> 
#> [[3]]
#> [1] 2
#> 
#> [[4]]
#> [1] 99

TimTeaFan avatar Aug 25 '22 13:08 TimTeaFan

Somewhat more minimal reprex:

library(purrr)

x <- list("a", 99)
str(lmap_if(x, is.character, ~ list(1, 2), .else = ~ list(3, 4)))
#> List of 4
#>  $ : num 1
#>  $ : num 3
#>  $ : num 4
#>  $ : num 99

Created on 2022-08-27 by the reprex package (v2.0.1)

hadley avatar Aug 27 '22 19:08 hadley