connectapi
connectapi copied to clipboard
Release connectapi
FYI, there's now a show-stopping interaction between dplyr, dbplyr and connectapi with the version of connectapi that's currently on CRAN.
It is solved by #210, but it's a big enough issue that I'm reporting it as motivation to release the latest version of connectapi.
The essence of the issue is that dplyr and dbplyr now expect new syntax for the by
argument of the *_join()
functions. connectapi's overriding of the tbl_lazy
methods get in the way and break many joins.
Currently (CRAN version)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(dbplyr)
#>
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#>
#> ident, sql
library(connectapi)
#> Registered S3 methods overwritten by 'connectapi':
#> method from
#> dim.tbl_lazy dbplyr
#> dimnames.tbl_lazy dbplyr
g <- c(1, 1, 2, 2, 2)
h <- c(1, 2, 3, 4, 5)
x <- lazy_frame(
g = g, h = h, x = 1:5,
con = simulate_postgres()
) |>
mutate(x_10 = x * 10)
#> Error in `x$"ops"`:
#> ! The `$` method of <tbl_lazy> is for internal use only.
#> ℹ Use `dplyr::pull()` to get the values in a column.
y <- lazy_frame(
g = g, h = h, y = 5:1,
con = simulate_postgres()
) |>
mutate(y_10 = y * 10)
#> Error in `x$"ops"`:
#> ! The `$` method of <tbl_lazy> is for internal use only.
#> ℹ Use `dplyr::pull()` to get the values in a column.
anti_join(x, y, by = "g") |> show_query()
#> Error in eval(expr, envir, enclos): object 'x' not found
join_vars <- intersect(colnames(x), colnames(y))
#> Error in eval(expr, envir, enclos): object 'x' not found
anti_join(x, y, by = join_vars) |> show_query()
#> Error in eval(expr, envir, enclos): object 'x' not found
join_vars <- function() "g"
anti_join(x, y, by = join_vars()) |> show_query()
#> Error in eval(expr, envir, enclos): object 'x' not found
After (dev version)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(dbplyr)
#>
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#>
#> ident, sql
library(connectapi)
g <- c(1, 1, 2, 2, 2)
h <- c(1, 2, 3, 4, 5)
x <- lazy_frame(
g = g, h = h, x = 1:5,
con = simulate_postgres()
) |>
mutate(x_10 = x * 10)
y <- lazy_frame(
g = g, h = h, y = 5:1,
con = simulate_postgres()
) |>
mutate(y_10 = y * 10)
anti_join(x, y, by = "g") |> show_query()
#> <SQL>
#> SELECT `LHS`.*
#> FROM (
#> SELECT `df`.*, `x` * 10.0 AS `x_10`
#> FROM `df`
#> ) AS `LHS`
#> WHERE NOT EXISTS (
#> SELECT 1 FROM (
#> SELECT `df`.*, `y` * 10.0 AS `y_10`
#> FROM `df`
#> ) AS `RHS`
#> WHERE (`LHS`.`g` = `RHS`.`g`)
#> )
join_vars <- intersect(colnames(x), colnames(y))
anti_join(x, y, by = join_vars) |> show_query()
#> <SQL>
#> SELECT `LHS`.*
#> FROM (
#> SELECT `df`.*, `x` * 10.0 AS `x_10`
#> FROM `df`
#> ) AS `LHS`
#> WHERE NOT EXISTS (
#> SELECT 1 FROM (
#> SELECT `df`.*, `y` * 10.0 AS `y_10`
#> FROM `df`
#> ) AS `RHS`
#> WHERE (`LHS`.`g` = `RHS`.`g`) AND (`LHS`.`h` = `RHS`.`h`)
#> )
join_vars <- function() "g"
anti_join(x, y, by = join_vars()) |> show_query()
#> <SQL>
#> SELECT `LHS`.*
#> FROM (
#> SELECT `df`.*, `x` * 10.0 AS `x_10`
#> FROM `df`
#> ) AS `LHS`
#> WHERE NOT EXISTS (
#> SELECT 1 FROM (
#> SELECT `df`.*, `y` * 10.0 AS `y_10`
#> FROM `df`
#> ) AS `RHS`
#> WHERE (`LHS`.`g` = `RHS`.`g`)
#> )