insight icon indicating copy to clipboard operation
insight copied to clipboard

`get_variance()` and `null_model()` issues when called from inside functions

Open strengejacke opened this issue 3 years ago • 6 comments

library(glmmTMB)
library(insight)

my_fun0 <- function(y) {
  y <- transform(y, dudes = mpg / 100)
  glmmTMB(dudes ~ cyl, data = y, family = beta_family())
}

my_fun1 <- function(y) {
  y <- transform(y, dudes = mpg / 100)
  model <- glmmTMB(dudes ~ cyl, data = y, family = beta_family())
  null_model(model)
}

null_model(my_fun0(mtcars))
#> Error in eval(offsub, data, enclos = environment(formula)): object 'y' not found
my_fun1(mtcars)
#> Error in eval(offsub, data, enclos = environment(formula)): object 'y' not found

d <- transform(mtcars, dudes = mpg / 100)
model <- glmmTMB(dudes ~ cyl, data = d, family = beta_family())
null_model(model)
#> Formula:          dudes ~ 1
#> Data: d
#>       AIC       BIC    logLik  df.resid 
#> -88.53589 -85.60442  46.26794        30 
#> 
#> Number of obs: 32
#> 
#> Dispersion parameter for beta family (): 46.8 
#> 
#> Fixed Effects:
#> 
#> Conditional model:
#> (Intercept)  
#>       -1.38

Created on 2022-05-18 by the reprex package (v2.0.1)

See discussion https://github.com/glmmTMB/glmmTMB/issues/169

strengejacke avatar May 18 '22 06:05 strengejacke

@bwiernik @bbolker In insight::null_model(), changing

stats::update(model, ~1)

to

out <- stats::update(model, ~1, evaluate = FALSE)
eval(out, envir = parent.frame())

at least helps for the 2nd case.

library(glmmTMB)
library(insight)

my_fun0 <- function(y) {
  y <- transform(y, dudes = mpg / 100)
  glmmTMB(dudes ~ cyl, data = y, family = beta_family())
}

my_fun1 <- function(y) {
  y <- transform(y, dudes = mpg / 100)
  model <- glmmTMB(dudes ~ cyl, data = y, family = beta_family())
  null_model(model)
}

null_model(my_fun0(mtcars))
#> Error in eval(offsub, data, enclos = environment(formula)): object 'y' not found
my_fun1(mtcars)
#> Formula:          dudes ~ 1
#> Data: y
#>       AIC       BIC    logLik  df.resid 
#> -88.53589 -85.60442  46.26794        30 
#> 
#> Number of obs: 32
#> 
#> Dispersion parameter for beta family (): 46.8 
#> 
#> Fixed Effects:
#> 
#> Conditional model:
#> (Intercept)  
#>       -1.38

d <- transform(mtcars, dudes = mpg / 100)
model <- glmmTMB(dudes ~ cyl, data = d, family = beta_family())
null_model(model)
#> Formula:          dudes ~ 1
#> Data: d
#>       AIC       BIC    logLik  df.resid 
#> -88.53589 -85.60442  46.26794        30 
#> 
#> Number of obs: 32
#> 
#> Dispersion parameter for beta family (): 46.8 
#> 
#> Fixed Effects:
#> 
#> Conditional model:
#> (Intercept)  
#>       -1.38

Created on 2022-05-18 by the reprex package (v2.0.1)

strengejacke avatar May 18 '22 06:05 strengejacke

Yes, the second case being fixed makes sense. I'm not sure how fixable the first case is given that the object y only exists within the enclosure of the function which no longer exists. What's the real context where this is a problem?

bwiernik avatar May 18 '22 13:05 bwiernik

One not-ridiculous use case is: generate a list of model fits, simulating the data within the model-fitting function each time. Then map over the list of model fits to compute summaries etc.. (After all, the OP presumably came up with the original version of this example in some real workflow.) It can often be useful to look in the environment of the formula:

m <- my_fun0(mtcars)
> ls(environment(formula(m)))
[1] "y"
> str(environment(formula(m))$y)
'data.frame':	32 obs. of  12 variables:

There is some pretty nightmarish stuff in lme4 that hunts through a variety of possible environments to find the required stuff ... but you could try to evaluate in the environment of the formula, then in the parent environment, then give up. (This solution can of course be defeated - just define the formula in one environment, define the data and fit the model with the predefined formula in another environment, then try to update the model in a third environment ...

bbolker avatar May 18 '22 13:05 bbolker

Checking if .variance_family_beta(x, mu, sig) can be replaced by stats::family(x)$variance(sig)

Using .variance_family_beta(x, mu, sig)

library(insight)
library(glmmTMB)
library(betareg)
library(datawizard)
library(performance)

data(iris)
iris$y <- normalize(iris$Sepal.Length, include_bounds  = FALSE)

m1 <- betareg(y ~ Petal.Width + Petal.Length + Species, data = iris)
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length + Species, data = iris, family = beta_family())

r2(m1)
#> # R2 for Beta Regression
#>   Pseudo R2: 0.734
r2(m2)
#> Warning: mu of 0.8 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NA
#>      Marginal R2: 0.883

m1 <- glmmTMB(y ~ Petal.Width + Petal.Length + (1 | Species), data = iris, family = beta_family())
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length, data = iris, family = beta_family())

r2(m1)
#> Warning: mu of 0.7 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: 0.997
#>      Marginal R2: 0.766
r2(m2)
#> Warning: mu of 0.8 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NA
#>      Marginal R2: 0.874

Using stats::family(x)$variance(sig)

library(insight)
library(glmmTMB)
library(betareg)
library(datawizard)
library(performance)

data(iris)
iris$y <- normalize(iris$Sepal.Length, include_bounds  = FALSE)

m1 <- betareg(y ~ Petal.Width + Petal.Length + Species, data = iris)
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length + Species, data = iris, family = beta_family())

r2(m1)
#> # R2 for Beta Regression
#>   Pseudo R2: 0.734
r2(m2)
#> Warning: mu of 0.8 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NA
#>      Marginal R2: 1.000

m1 <- glmmTMB(y ~ Petal.Width + Petal.Length + (1 | Species), data = iris, family = beta_family())
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length, data = iris, family = beta_family())

r2(m1)
#> Warning: mu of 0.7 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Warning: Model's distribution-specific variance is negative. Results are not
#>   reliable.
#> Warning in log1p(cvsquared): NaNs produced
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NaN
#>      Marginal R2: NaN
r2(m2)
#> Warning: mu of 0.8 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NA
#>      Marginal R2: 1.000

Using stats::family(x)$variance(mu)

library(insight)
library(glmmTMB)
library(betareg)
library(datawizard)
library(performance)

data(iris)
iris$y <- normalize(iris$Sepal.Length, include_bounds  = FALSE)

m1 <- betareg(y ~ Petal.Width + Petal.Length + Species, data = iris)
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length + Species, data = iris, family = beta_family())

r2(m1)
#> # R2 for Beta Regression
#>   Pseudo R2: 0.734
r2(m2)
#> Warning: mu of 0.8 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NA
#>      Marginal R2: 0.800

m1 <- glmmTMB(y ~ Petal.Width + Petal.Length + (1 | Species), data = iris, family = beta_family())
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length, data = iris, family = beta_family())

r2(m1)
#> Warning: mu of 0.7 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: 0.950
#>      Marginal R2: 0.730
r2(m2)
#> Warning: mu of 0.8 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NA
#>      Marginal R2: 0.787

Created on 2022-05-19 by the reprex package (v2.0.1)

strengejacke avatar May 19 '22 06:05 strengejacke

To summarize, it looks like using mu seems to give the best results. However, retrieving mu requires to compute the null-model. But I wonder why exp(mu) is needed? See https://github.com/easystats/insight/blob/e104d8a95c59c7092b5712e29da0118b05ced215/R/compute_variances.R#L590 (I think this was from the initial implementation from @bbolker). Why not use link_inverse(), which comes closer to the mean value of the outcome?

library(insight)
library(glmmTMB)
library(datawizard)

data(iris)
iris$y <- normalize(iris$Sepal.Length, include_bounds  = FALSE)

m1 <- glmmTMB(y ~ Petal.Width + Petal.Length + (1 | Species), data = iris, family = beta_family())
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length, data = iris, family = beta_family())


exp(fixef(null_model(m1))[[1]])
#> (Intercept) 
#>   0.7473273
exp(fixef(null_model(m2))[[1]])
#> (Intercept) 
#>   0.7716764

mean(iris$y)
#> [1] 0.429179

link_inverse(m1)(fixef(null_model(m1))[[1]])
#> (Intercept) 
#>   0.4276974

strengejacke avatar May 19 '22 07:05 strengejacke

Using stats::family(x)$variance(mu), with mu = link_inverse(null_model) (instead of exp)

library(insight)
library(glmmTMB)
library(betareg)
library(datawizard)
library(performance)

data(iris)
iris$y <- normalize(iris$Sepal.Length, include_bounds  = FALSE)

m1 <- betareg(y ~ Petal.Width + Petal.Length + Species, data = iris)
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length + Species, data = iris, family = beta_family())

r2(m1)
#> # R2 for Beta Regression
#>   Pseudo R2: 0.734
r2(m2)
#> Warning: mu of 0.4 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NA
#>      Marginal R2: 0.555

m1 <- glmmTMB(y ~ Petal.Width + Petal.Length + (1 | Species), data = iris, family = beta_family())
m2 <- glmmTMB(y ~ Petal.Width + Petal.Length, data = iris, family = beta_family())

r2(m1)
#> Warning: mu of 0.4 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: 0.868
#>      Marginal R2: 0.667
r2(m2)
#> Warning: mu of 0.4 is too close to zero, estimate of random effect variances may
#>   be unreliable.
#> Random effect variances not available. Returned R2 does not account for random effects.
#> # R2 for Mixed Models
#> 
#>   Conditional R2: NA
#>      Marginal R2: 0.536

Created on 2022-05-19 by the reprex package (v2.0.1)

strengejacke avatar May 19 '22 07:05 strengejacke