Tibble Trickiness - $ operator is invalid for atomic vectors Debugging and check_calibration updates
https://github.com/facebookexperimental/Robyn/blob/5915aa9d5d31d8947a274aa00fdf30b8947823e5/R/R/checks.R#L628
Hey there, I kept running into something odd in my Robyn code, which kept suggesting I had the wrong data type and format. It looks like the real issue was incompatibility between a natural tibble class that is inherited and the column call of $, which is typically for a dataframe.
InputCollect <- robyn_inputs(InputCollect = InputCollect, calibration_input = calibration_input)
> Error in `filter()`:
> ℹ In argument: `get(date_var) >= temp$liftStartDate`.
> Caused by error in `temp$liftStartDate`:
> ! $ operator is invalid for atomic vectors
> Run `rlang::last_trace()` to see where the error occurred.
I was able to overcome this by rewriting the calibration function locally, but wanted to flag this issue in case it was most widespread. I'm on Robyn 3.12.1 and R Version R version 4.5.2.
Also, it looks like the end of this check function looks for a field called "scope" when the example .RMD and expected InputCollect file is "callibration_scope"! Example Notebook here. Seems like this check wouldn't happen right?
Thanks for your time and helping maintain this helpful resource!
check_calibration <- function(dt_input,
date_var, calibration_input, dayInterval, dep_var,
window_start, window_end, paid_media_spends, organic_vars, paid_media_selected) {
if (!is.null(calibration_input)) {
calibration_input <- as_tibble(as.data.frame(calibration_input))
these <- c("channel", "liftStartDate", "liftEndDate", "liftAbs", "spend", "confidence", "metric", "calibration_scope")
if (!all(these %in% names(calibration_input))) {
stop("Input 'calibration_input' must contain columns: ", v2t(these), ". Check the demo script for instruction.")
}
if (!is.numeric(calibration_input$liftAbs) || any(is.na(calibration_input$liftAbs))) {
stop("Check 'calibration_input$liftAbs': all lift values must be valid numerical numbers")
}
all_media <- c(paid_media_spends, organic_vars)
cal_media <- str_split(calibration_input$channel, "\\+|,|;|\\s")
cal_media_selected <- lapply(cal_media, function(x) {
sapply(x, function(y) {
ifelse(y %in% c(paid_media_selected, organic_vars), y, paid_media_selected[paid_media_spends == y])
})
})
calibration_input$channel_selected <- sapply(cal_media_selected, function(x) paste0(x, collapse = "+"))
if (!all(unlist(cal_media) %in% all_media)) {
these <- unique(unlist(cal_media)[which(!unlist(cal_media) %in% all_media)])
stop(sprintf(
"All channels from 'calibration_input' must be any of: %s.\n Check: %s",
v2t(all_media), v2t(these)
))
}
for (i in seq_along(calibration_input$channel)) {
temp <- as.data.frame(calibration_input[i, ])
if (temp$liftStartDate < (window_start) || temp$liftEndDate > (window_end)) {
stop(sprintf(
paste(
"Your calibration's date range for %s between %s and %s is not within modeling window (%s to %s).",
"Please, remove this experiment from 'calibration_input'."
),
temp$channel, temp$liftStartDate, temp$liftEndDate, window_start, window_end
))
}
if (temp$liftStartDate > temp$liftEndDate) {
stop(sprintf(
paste(
"Your calibration's date range for %s between %s and %s should respect liftStartDate <= liftEndDate.",
"Please, correct this experiment from 'calibration_input'."
),
temp$channel, temp$liftStartDate, temp$liftEndDate
))
}
}
if ("spend" %in% colnames(calibration_input)) {
for (i in seq_along(calibration_input$channel)) {
temp <- as.data.frame(calibration_input[i,])
temp2 <- cal_media[[i]]
if (all(temp2 %in% organic_vars)) next
dt_input_spend <- filter(
dt_input, get(date_var) >= c(calibration_input[i,"liftStartDate"]),
get(date_var) <= c(calibration_input[i,"liftEndDate"])
) %>%
select(all_of(temp2)) %>%
sum(.) %>%
round(., 0)
if (dt_input_spend > temp$spend * 1.1 || dt_input_spend < temp$spend * 0.9) {
warning(sprintf(
paste(
"Your calibration's spend (%s) for %s between %s and %s does not match your dt_input spend (~%s).",
"Please, check again your dates or split your media inputs into separate media channels."
),
formatNum(temp$spend, 0), temp$channel, temp$liftStartDate, temp$liftEndDate,
formatNum(dt_input_spend, 3, abbr = TRUE)
))
}
}
}
if ("confidence" %in% colnames(calibration_input)) {
for (i in seq_along(calibration_input$channel)) {
temp <- calibration_input[i, ]
if (temp$confidence < 0.8) {
warning(sprintf(
paste(
"Your calibration's confidence for %s between %s and %s is lower than 80%%, thus low-confidence.",
"Consider getting rid of this experiment and running it again."
),
temp$channel, temp$liftStartDate, temp$liftEndDate
))
}
}
}
if ("metric" %in% colnames(calibration_input)) {
for (i in seq_along(calibration_input$channel)) {
temp <- calibration_input[i, ]
if (temp$metric != dep_var) {
stop(sprintf(
paste(
"Your calibration's metric for %s between %s and %s is not '%s'.",
"Please, remove this experiment from 'calibration_input'."
),
temp$channel, temp$liftStartDate, temp$liftEndDate, dep_var
))
}
}
}
if ("scope" %in% colnames(calibration_input)) {
these <- c("immediate", "total")
if (!all(calibration_input$scope %in% these)) {
stop("Inputs in 'calibration_input$scope' must be any of: ", v2t(these))
}
}
}
return(calibration_input)
}
assignInNamespace("check_calibration", check_calibration, ns = "Robyn")
This seems to be a recent change in Tibble. This used to work a couple of months ago and the same script now no longer works, reverting to an old version of tibble might work as well but it would be better to just patch this in.
Notably, when debugging the call to temp$liftStartDate works, but only fails inside filter(. Maybe that function is what changed?