dplyr
dplyr copied to clipboard
Request non-equal join: closest() supports dual-sided inequality <>
For example, I now have the following Table 1 and Table 2. I want to find a value2
from both sides that is closest to the value
based on matching the IDs. However, closest() only supports an inequality involving one of: >, >=, <, or <=.
library(purrr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df1 = tibble(
ID = c("NO1", "NO1", "NO2", "NO3", "NO3", "NO3", "NO4", "NO5",
"NO5", "NO5", "NO5", "NO6", "NO7", "NO7", "NO7", "NO7",
"NO7", "NO8", "NO9", "NO9"),
value = c(11, 25, 22, 24, 29, 1.75, 26, 27, 55, 44, 2.1, 31,
35, 33, 410, 21, 1.44, 80, 75, 52))
df1
#> # A tibble: 20 × 2
#> ID value2
#> <chr> <dbl>
#> 1 NO1 11
#> 2 NO1 25
#> 3 NO2 22
#> 4 NO3 24
#> 5 NO3 29
#> 6 NO3 1.75
#> 7 NO4 26
#> 8 NO5 27
#> 9 NO5 55
#> 10 NO5 44
#> 11 NO5 2.1
#> 12 NO6 31
#> 13 NO7 35
#> 14 NO7 33
#> 15 NO7 410
#> 16 NO7 21
#> 17 NO7 1.44
#> 18 NO8 80
#> 19 NO9 75
#> 20 NO9 52
df2 = tibble(
编号 = c("NO1", "NO1", "NO2", "NO3", "NO3", "NO4", "NO5", "NO5",
"NO5", "NO5", "NO6", "NO7", "NO7", "NO7", "NO8", "NO9", "NO9"),
数值2 = c(11.2, 24.6, 21.6, 22, 29.5, 26.1, 26.9, 26.4, 54.3,
42.1, 30.2, 34.7, 31.2, 20.59, 79.8, 74.2, 50.9))
df2
#> # A tibble: 17 × 2
#> 编号 数值2
#> <chr> <dbl>
#> 1 NO1 11.2
#> 2 NO1 24.6
#> 3 NO2 21.6
#> 4 NO3 22
#> 5 NO3 29.5
#> 6 NO4 26.1
#> 7 NO5 26.9
#> 8 NO5 26.4
#> 9 NO5 54.3
#> 10 NO5 42.1
#> 11 NO6 30.2
#> 12 NO7 34.7
#> 13 NO7 31.2
#> 14 NO7 20.6
#> 15 NO8 79.8
#> 16 NO9 74.2
#> 17 NO9 50.9
The manual method I can think of right now is:
df1 %>%
left_join(df2, join_by(编号), relationship = "many-to-many") %>%
group_nest(编号, 数值) %>%
mutate(数值2 = map2_dbl(数值, data, \(x, y) y$数值2[which.min(abs(y$数值2 - x))]),
误差 = abs(数值 - 数值2)) %>%
select(-data)
#> # A tibble: 20 × 4
#> 编号 数值 数值2 误差
#> <chr> <dbl> <dbl> <dbl>
#> 1 NO1 11 11.2 0.200
#> 2 NO1 25 24.6 0.400
#> 3 NO2 22 21.6 0.400
#> 4 NO3 1.75 22 20.2
#> 5 NO3 24 22 2
#> 6 NO3 29 29.5 0.5
#> 7 NO4 26 26.1 0.100
#> 8 NO5 2.1 26.4 24.3
#> 9 NO5 27 26.9 0.100
#> 10 NO5 44 42.1 1.90
#> 11 NO5 55 54.3 0.700
#> 12 NO6 31 30.2 0.800
#> 13 NO7 1.44 20.6 19.2
#> 14 NO7 21 20.6 0.41
#> 15 NO7 33 34.7 1.70
#> 16 NO7 35 34.7 0.300
#> 17 NO7 410 34.7 375.
#> 18 NO8 80 79.8 0.200
#> 19 NO9 52 50.9 1.10
#> 20 NO9 75 74.2 0.800