connectapi icon indicating copy to clipboard operation
connectapi copied to clipboard

Release connectapi

Open gadenbuie opened this issue 11 months ago • 0 comments

FYI, there's now a show-stopping interaction between dplyr, dbplyr and connectapi with the version of connectapi that's currently on CRAN.

It is solved by #210, but it's a big enough issue that I'm reporting it as motivation to release the latest version of connectapi.

The essence of the issue is that dplyr and dbplyr now expect new syntax for the by argument of the *_join() functions. connectapi's overriding of the tbl_lazy methods get in the way and break many joins.

Currently (CRAN version)

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(dbplyr)
#> 
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     ident, sql
library(connectapi)
#> Registered S3 methods overwritten by 'connectapi':
#>   method            from  
#>   dim.tbl_lazy      dbplyr
#>   dimnames.tbl_lazy dbplyr

g <- c(1, 1, 2, 2, 2)
h <- c(1, 2, 3, 4, 5)

x <- lazy_frame(
  g = g, h = h, x = 1:5,
  con = simulate_postgres()
) |>
  mutate(x_10 = x * 10)
#> Error in `x$"ops"`:
#> ! The `$` method of <tbl_lazy> is for internal use only.
#> ℹ Use `dplyr::pull()` to get the values in a column.

y <- lazy_frame(
  g = g, h = h, y = 5:1,
  con = simulate_postgres()
) |>
  mutate(y_10 = y * 10)
#> Error in `x$"ops"`:
#> ! The `$` method of <tbl_lazy> is for internal use only.
#> ℹ Use `dplyr::pull()` to get the values in a column.


anti_join(x, y, by = "g") |> show_query()
#> Error in eval(expr, envir, enclos): object 'x' not found

join_vars <- intersect(colnames(x), colnames(y))
#> Error in eval(expr, envir, enclos): object 'x' not found
anti_join(x, y, by = join_vars) |> show_query()
#> Error in eval(expr, envir, enclos): object 'x' not found

join_vars <- function() "g"
anti_join(x, y, by = join_vars()) |> show_query()
#> Error in eval(expr, envir, enclos): object 'x' not found

After (dev version)

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(dbplyr)
#> 
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     ident, sql
library(connectapi)

g <- c(1, 1, 2, 2, 2)
h <- c(1, 2, 3, 4, 5)

x <- lazy_frame(
  g = g, h = h, x = 1:5,
  con = simulate_postgres()
) |>
  mutate(x_10 = x * 10)

y <- lazy_frame(
  g = g, h = h, y = 5:1,
  con = simulate_postgres()
) |>
  mutate(y_10 = y * 10)


anti_join(x, y, by = "g") |> show_query()
#> <SQL>
#> SELECT `LHS`.*
#> FROM (
#>   SELECT `df`.*, `x` * 10.0 AS `x_10`
#>   FROM `df`
#> ) AS `LHS`
#> WHERE NOT EXISTS (
#>   SELECT 1 FROM (
#>   SELECT `df`.*, `y` * 10.0 AS `y_10`
#>   FROM `df`
#> ) AS `RHS`
#>   WHERE (`LHS`.`g` = `RHS`.`g`)
#> )

join_vars <- intersect(colnames(x), colnames(y))
anti_join(x, y, by = join_vars) |> show_query()
#> <SQL>
#> SELECT `LHS`.*
#> FROM (
#>   SELECT `df`.*, `x` * 10.0 AS `x_10`
#>   FROM `df`
#> ) AS `LHS`
#> WHERE NOT EXISTS (
#>   SELECT 1 FROM (
#>   SELECT `df`.*, `y` * 10.0 AS `y_10`
#>   FROM `df`
#> ) AS `RHS`
#>   WHERE (`LHS`.`g` = `RHS`.`g`) AND (`LHS`.`h` = `RHS`.`h`)
#> )

join_vars <- function() "g"
anti_join(x, y, by = join_vars()) |> show_query()
#> <SQL>
#> SELECT `LHS`.*
#> FROM (
#>   SELECT `df`.*, `x` * 10.0 AS `x_10`
#>   FROM `df`
#> ) AS `LHS`
#> WHERE NOT EXISTS (
#>   SELECT 1 FROM (
#>   SELECT `df`.*, `y` * 10.0 AS `y_10`
#>   FROM `df`
#> ) AS `RHS`
#>   WHERE (`LHS`.`g` = `RHS`.`g`)
#> )

gadenbuie avatar Mar 21 '24 21:03 gadenbuie