trendbreaker icon indicating copy to clipboard operation
trendbreaker copied to clipboard

model output for unquoted family argument (cosmetic)

Open TimTaylor opened this issue 4 years ago • 3 comments

Currently if an unquoted argument is given for the family parameter the formula in the resulting model is the entire function call. Borrowing from the glm code we can get a better input. There may be a better but this will work:

library(trendbreaker)

# current implementation
glm_poisson = glm_model(hp ~ 1 + cyl + drat + wt + qsec + am, poisson)
res <- glm_poisson$train(mtcars)
get_model(res)
#> 
#> Call:  glm(formula = hp ~ 1 + cyl + drat + wt + qsec + am, family = function (link = "log") 
#> {
#>     linktemp <- substitute(link)
#>     if (!is.character(linktemp)) 
#>         linktemp <- deparse(linktemp)
#>     okLinks <- c("log", "identity", "sqrt")
#>     family <- "poisson"
#>     if (linktemp %in% okLinks) 
#>         stats <- make.link(linktemp)
#>     else if (is.character(link)) {
#>         stats <- make.link(link)
#>         linktemp <- link
#>     }
#>     else {
#>         if (inherits(link, "link-glm")) {
#>             stats <- link
#>             if (!is.null(stats$name)) 
#>                 linktemp <- stats$name
#>         }
#>         else {
#>             stop(gettextf("link \"%s\" not available for %s family; available links are %s", 
#>                 linktemp, family, paste(sQuote(okLinks), collapse = ", ")), 
#>                 domain = NA)
#>         }
#>     }
#>     variance <- function(mu) mu
#>     validmu <- function(mu) all(is.finite(mu)) && all(mu > 0)
#>     dev.resids <- function(y, mu, wt) {
#>         r <- mu * wt
#>         p <- which(y > 0)
#>         r[p] <- (wt * (y * log(y/mu) - (y - mu)))[p]
#>         2 * r
#>     }
#>     aic <- function(y, n, mu, wt, dev) -2 * sum(dpois(y, mu, 
#>         log = TRUE) * wt)
#>     initialize <- expression({
#>         if (any(y < 0)) stop("negative values not allowed for the 'Poisson' family")
#>         n <- rep.int(1, nobs)
#>         mustart <- y + 0.1
#>     })
#>     simfun <- function(object, nsim) {
#>         wts <- object$prior.weights
#>         if (any(wts != 1)) 
#>             warning("ignoring prior weights")
#>         ftd <- fitted(object)
#>         rpois(nsim * length(ftd), ftd)
#>     }
#>     structure(list(family = family, link = linktemp, linkfun = stats$linkfun, 
#>         linkinv = stats$linkinv, variance = variance, dev.resids = dev.resids, 
#>         aic = aic, mu.eta = stats$mu.eta, initialize = initialize, 
#>         validmu = validmu, valideta = stats$valideta, simulate = simfun), 
#>         class = "family")
#> }, data = data)
#> 
#> Coefficients:
#> (Intercept)          cyl         drat           wt         qsec           am  
#>    6.256607     0.085193    -0.009274     0.176041    -0.136688     0.036604  
#> 
#> Degrees of Freedom: 31 Total (i.e. Null);  26 Residual
#> Null Deviance:       958.3 
#> Residual Deviance: 116.5     AIC: 343.6

# add family check / conversion from glm

glm_model <- function(formula, family, ...) {
  if (is.character(family)) 
    family <- get(family, mode = "function", envir = parent.frame())
  if (is.function(family)) 
    family <- family()
  if (is.null(family$family)) {
    print(family)
    stop("'family' not recognized")
  }
  
  structure(
    eval(bquote(list(
      model_class = "glm",
      train = function(data) {
        model <- glm(formula = .(formula), family = .(family$family), data = data, ...)
        trendbreaker:::model_fit(model, formula)
      }
    ))),
    class = c("trendbreaker_glm", "trendbreaker_model")
  )
}

# better formula in model output
glm_poisson = glm_model(hp ~ 1 + cyl + drat + wt + qsec + am, family = poisson)
res <- glm_poisson$train(mtcars)
get_model(res)
#> 
#> Call:  glm(formula = hp ~ 1 + cyl + drat + wt + qsec + am, family = "poisson", 
#>     data = data)
#> 
#> Coefficients:
#> (Intercept)          cyl         drat           wt         qsec           am  
#>    6.256607     0.085193    -0.009274     0.176041    -0.136688     0.036604  
#> 
#> Degrees of Freedom: 31 Total (i.e. Null);  26 Residual
#> Null Deviance:       958.3 
#> Residual Deviance: 116.5     AIC: 343.6

# still works with character input for family
glm_poisson = glm_model(hp ~ 1 + cyl + drat + wt + qsec + am, family = "poisson")
res <- glm_poisson$train(mtcars)
get_model(res)
#> 
#> Call:  glm(formula = hp ~ 1 + cyl + drat + wt + qsec + am, family = "poisson", 
#>     data = data)
#> 
#> Coefficients:
#> (Intercept)          cyl         drat           wt         qsec           am  
#>    6.256607     0.085193    -0.009274     0.176041    -0.136688     0.036604  
#> 
#> Degrees of Freedom: 31 Total (i.e. Null);  26 Residual
#> Null Deviance:       958.3 
#> Residual Deviance: 116.5     AIC: 343.6

Created on 2020-08-04 by the reprex package (v0.3.0)

TimTaylor avatar Aug 04 '20 10:08 TimTaylor

actually, probably better to convert to character and let glm do the failing...

TimTaylor avatar Aug 04 '20 10:08 TimTaylor

library(trendbreaker)

glm_model <- function(formula, family, ...) {
  if (!is.character(family)) {
    family <- deparse(substitute(family))
  }
  
  structure(
    eval(bquote(list(
      model_class = "glm",
      train = function(data) {
        model <- glm(formula = .(formula), family = .(family), data = data, ...)
        trendbreaker:::model_fit(model, formula)
      }
    ))),
    class = c("trendbreaker_glm", "trendbreaker_model")
  )
}


# better formula in model output
glm_poisson = glm_model(hp ~ 1 + cyl + drat + wt + qsec + am, family = poisson)
res <- glm_poisson$train(mtcars)
get_model(res)
#> 
#> Call:  glm(formula = hp ~ 1 + cyl + drat + wt + qsec + am, family = "poisson", 
#>     data = data)
#> 
#> Coefficients:
#> (Intercept)          cyl         drat           wt         qsec           am  
#>    6.256607     0.085193    -0.009274     0.176041    -0.136688     0.036604  
#> 
#> Degrees of Freedom: 31 Total (i.e. Null);  26 Residual
#> Null Deviance:       958.3 
#> Residual Deviance: 116.5     AIC: 343.6

# still works with character input for family
glm_poisson = glm_model(hp ~ 1 + cyl + drat + wt + qsec + am, family = "poisson")
res <- glm_poisson$train(mtcars)
get_model(res)
#> 
#> Call:  glm(formula = hp ~ 1 + cyl + drat + wt + qsec + am, family = "poisson", 
#>     data = data)
#> 
#> Coefficients:
#> (Intercept)          cyl         drat           wt         qsec           am  
#>    6.256607     0.085193    -0.009274     0.176041    -0.136688     0.036604  
#> 
#> Degrees of Freedom: 31 Total (i.e. Null);  26 Residual
#> Null Deviance:       958.3 
#> Residual Deviance: 116.5     AIC: 343.6

Created on 2020-08-04 by the reprex package (v0.3.0)

TimTaylor avatar Aug 04 '20 11:08 TimTaylor

will be closed by trending

TimTaylor avatar Aug 12 '20 10:08 TimTaylor