funneljoin
funneljoin copied to clipboard
Issue with remote tables
The remote table join returns 0 rows, while when collected it returns more than 0.
library(tidyverse)
library(datacampr)
library(funneljoin)
regs <- tbl_views_snowplow_experiment_starts() %>%
after_inner_join(tbl_views_snowplow_registration_events(),
by_user = "domain_userid",
by_time = c("root_tstamp" = "registered_at"),
type = "withingap",
dt = as.difftime(30, units = "days")) %>%
filter(root_tstamp > "2018-07-15")
regs %>%
collect() %>%
mutate(user_id = as.integer(user_id)) %>%
after_join(tbl_main_user_exercises() %>%
filter(completed_at > "2018-08-01") %>%
mutate(user_id = as.integer(user_id)) %>%
collect(),
by_user = "user_id",
by_time = c("root_tstamp" = "completed_at"),
type = "any-firstafter",
mode = "inner") %>%
count()
regs %>%
after_join(tbl_main_user_exercises() %>%
filter(completed_at > "2018-08-01"),
by_user = "user_id",
by_time = c("root_tstamp" = "completed_at"),
type = "any-firstafter",
mode = "inner") %>%
count()
These give the same answers though. It seems like it might be an issue with multiple joins in a row
tbl_views_snowplow_registration_events() %>%
filter(registered_at > "2018-07-15") %>%
after_join(tbl_main_user_exercises() %>%
filter(completed_at > "2018-08-01"),
by_user = "user_id",
by_time = c("registered_at" = "completed_at"),
type = "any-firstafter",
mode = "inner") %>%
count()
tbl_views_snowplow_registration_events() %>%
filter(registered_at > "2018-07-15") %>%
collect() %>%
mutate(user_id = as.integer(user_id)) %>%
after_join(tbl_main_user_exercises() %>%
filter(completed_at > "2018-08-01") %>%
collect() %>%
mutate(user_id = as.integer(user_id)),
by_user = "user_id",
by_time = c("registered_at" = "completed_at"),
type = "any-firstafter",
mode = "inner") %>%
count()
Current fix is not allowing people to do multiple funnel joins in a row remotely ... still need to figure out underlying issue