broom P-value incorrect when using ddf argument in tidy.svyglm()

The problem

The p-value reported by tidy.svyglm() does not match the confidence interval when the ddf argument is provided (and is different from the default denominator degrees of freedom, which uses the df.residual of the svyglm object).

A quick search revealed that the ddf argument was implemented for the confidence interval following issue #839 with commit https://github.com/tidymodels/broom/commit/b7f690a056867cda5625f813b7e7418b4b85a7e5, but the current development version still seems to need it implemented for the p-value here: https://github.com/tidymodels/broom/blob/5d77eb743b3be3eca2df3c1228664925c4a481ce/R/survey-tidiers.R#L65

Since summary.svyglm() uses df.resid for the argument name and confint.svyglm() uses ddf for the argument name, it may require a dedicated argument in the tidy.svyglm() function.

Reproducible example

library(broom)
#> Warning: package 'broom' was built under R version 4.1.3
library(survey)
#> Loading required package: grid
#> Loading required package: Matrix
#> Loading required package: survival
#> 
#> Attaching package: 'survey'
#> The following object is masked from 'package:graphics':
#> 
#>     dotchart

data(api)

dstrat <- svydesign(id = ~ 1,
                    strata = ~ stype, 
                    weights = ~ pw, 
                    data = apistrat, 
                    fpc = ~fpc)

model <- svyglm(sch.wide ~ yr.rnd, design = dstrat, family = binomial)
#> Warning in eval(family$initialize): non-integer #successes in a binomial glm!


## Default ddf
## confint.svyglm(., ddf = NULL)
## summary.svyglm(., df.resid = NULL)
tidy(model,
     conf.int = TRUE,
     exponentiate = TRUE)
#> # A tibble: 2 x 7
#>   term        estimate std.error statistic  p.value conf.low conf.high
#>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
#> 1 (Intercept)     4.42     0.179      8.28 1.89e-14    3.10       6.29
#> 2 yr.rndYes       2.19     0.702      1.12 2.64e- 1    0.550      8.75

## Same p-value in summary.svyglm
summary(model)
#> 
#> Call:
#> svyglm(formula = sch.wide ~ yr.rnd, design = dstrat, family = binomial)
#> 
#> Survey design:
#> svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat, 
#>     fpc = ~fpc)
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   1.4857     0.1794   8.280 1.89e-14 ***
#> yr.rndYes     0.7853     0.7017   1.119    0.264    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1.005025)
#> 
#> Number of Fisher Scoring iterations: 4


## Changed ddf (but no change in p-value)
## confint.svyglm(., ddf = degf(dstrat))
## summary.svyglm(., df.resid = NULL)
tidy(model,
     conf.int = TRUE,
     exponentiate = TRUE,
     ddf = 5) ## extreme ddf to exaggerate difference
#> # A tibble: 2 x 7
#>   term        estimate std.error statistic  p.value conf.low conf.high
#>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
#> 1 (Intercept)     4.42     0.179      8.28 1.89e-14    2.79       7.01
#> 2 yr.rndYes       2.19     0.702      1.12 2.64e- 1    0.361     13.3

## Correct p-value for ddf = 5 (df.resid = 5)
summary(model, df.resid = 5)
#> 
#> Call:
#> svyglm(formula = sch.wide ~ yr.rnd, design = dstrat, family = binomial)
#> 
#> Survey design:
#> svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat, 
#>     fpc = ~fpc)
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   1.4857     0.1794   8.280 0.000419 ***
#> yr.rndYes     0.7853     0.7017   1.119 0.313952    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1.005025)
#> 
#> Number of Fisher Scoring iterations: 4

^{Created on 2022-04-07 by the reprex package (v2.0.1)}

Session info

sessioninfo::session_info()
#> - Session info ---------------------------------------------------------------
#>  setting  value
#>  version  R version 4.1.2 (2021-11-01)
#>  os       Windows 10 x64 (build 19044)
#>  system   x86_64, mingw32
#>  ui       RTerm
#>  language (EN)
#>  collate  English_United States.1252
#>  ctype    English_United States.1252
#>  tz       America/Denver
#>  date     2022-04-07
#>  pandoc   2.17.1.1 @ C:/Program Files/RStudio/bin/quarto/bin/ (via rmarkdown)
#> 
#> - Packages -------------------------------------------------------------------
#>  package     * version date (UTC) lib source
#>  assertthat    0.2.1   2019-03-21 [2] CRAN (R 4.1.2)
#>  backports     1.4.1   2021-12-13 [1] CRAN (R 4.1.2)
#>  broom       * 0.7.12  2022-01-28 [1] CRAN (R 4.1.3)
#>  cli           3.1.0   2021-10-27 [1] CRAN (R 4.1.2)
#>  crayon        1.4.2   2021-10-29 [1] CRAN (R 4.1.2)
#>  DBI           1.1.1   2021-01-15 [2] CRAN (R 4.1.2)
#>  digest        0.6.29  2021-12-01 [1] CRAN (R 4.1.2)
#>  dplyr         1.0.7   2021-06-18 [1] CRAN (R 4.1.2)
#>  ellipsis      0.3.2   2021-04-29 [1] CRAN (R 4.1.2)
#>  evaluate      0.14    2019-05-28 [2] CRAN (R 4.1.2)
#>  fansi         0.5.0   2021-05-25 [1] CRAN (R 4.1.2)
#>  fastmap       1.1.0   2021-01-25 [2] CRAN (R 4.1.2)
#>  fs            1.5.2   2021-12-08 [2] CRAN (R 4.1.2)
#>  generics      0.1.1   2021-10-25 [1] CRAN (R 4.1.2)
#>  glue          1.6.0   2021-12-17 [1] CRAN (R 4.1.2)
#>  highr         0.9     2021-04-16 [2] CRAN (R 4.1.2)
#>  htmltools     0.5.2   2021-08-25 [2] CRAN (R 4.1.2)
#>  knitr         1.37    2021-12-16 [1] CRAN (R 4.1.2)
#>  lattice       0.20-45 2021-09-22 [1] CRAN (R 4.1.2)
#>  lifecycle     1.0.1   2021-09-24 [1] CRAN (R 4.1.2)
#>  magrittr      2.0.1   2020-11-17 [1] CRAN (R 4.1.2)
#>  Matrix      * 1.4-0   2021-12-08 [1] CRAN (R 4.1.2)
#>  mitools       2.4     2019-04-26 [1] CRAN (R 4.1.2)
#>  pillar        1.6.4   2021-10-18 [1] CRAN (R 4.1.2)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.1.2)
#>  purrr         0.3.4   2020-04-17 [1] CRAN (R 4.1.2)
#>  R.cache       0.15.0  2021-04-30 [2] CRAN (R 4.1.3)
#>  R.methodsS3   1.8.1   2020-08-26 [2] CRAN (R 4.1.1)
#>  R.oo          1.24.0  2020-08-26 [2] CRAN (R 4.1.1)
#>  R.utils       2.11.0  2021-09-26 [2] CRAN (R 4.1.3)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.1.2)
#>  reprex        2.0.1   2021-08-05 [2] CRAN (R 4.1.2)
#>  rlang         0.4.12  2021-10-18 [1] CRAN (R 4.1.2)
#>  rmarkdown     2.11    2021-09-14 [2] CRAN (R 4.1.2)
#>  rstudioapi    0.13    2020-11-12 [2] CRAN (R 4.1.2)
#>  sessioninfo   1.2.2   2021-12-06 [2] CRAN (R 4.1.2)
#>  stringi       1.7.6   2021-11-29 [1] CRAN (R 4.1.2)
#>  stringr       1.4.0   2019-02-10 [1] CRAN (R 4.1.2)
#>  styler        1.7.0   2022-03-13 [2] CRAN (R 4.1.3)
#>  survey      * 4.1-1   2021-07-19 [1] CRAN (R 4.1.2)
#>  survival    * 3.2-13  2021-08-24 [2] CRAN (R 4.1.2)
#>  tibble        3.1.6   2021-11-07 [1] CRAN (R 4.1.2)
#>  tidyr         1.1.4   2021-09-27 [1] CRAN (R 4.1.2)
#>  tidyselect    1.1.1   2021-04-30 [1] CRAN (R 4.1.2)
#>  utf8          1.2.2   2021-07-24 [1] CRAN (R 4.1.2)
#>  vctrs         0.3.8   2021-04-29 [1] CRAN (R 4.1.2)
#>  withr         2.4.3   2021-11-30 [1] CRAN (R 4.1.2)
#>  xfun          0.29    2021-12-14 [2] CRAN (R 4.1.2)
#>  yaml          2.2.1   2020-02-01 [2] CRAN (R 4.1.1)
#> 
#>  [1] C:/..../renv/library/R-4.1/x86_64-w64-mingw32
#>  [2] C:/.../R/R-4.1.2/library
#> 
#> ------------------------------------------------------------------------------

Apr 08 '22 03:04 ray-p144

Here is a potential fix to this issue. If you would like, I can open a pull request to add this in.

library(broom)
#> Warning: package 'broom' was built under R version 4.1.3
library(survey)
#> Loading required package: grid
#> Loading required package: Matrix
#> Loading required package: survival
#> 
#> Attaching package: 'survey'
#> The following object is masked from 'package:graphics':
#> 
#>     dotchart

data(api)

dstrat <- svydesign(id = ~ 1,
                    strata = ~ stype, 
                    weights = ~ pw, 
                    data = apistrat, 
                    fpc = ~fpc)

model <- svyglm(sch.wide ~ yr.rnd, design = dstrat, family = binomial)
#> Warning in eval(family$initialize): non-integer #successes in a binomial glm!

## Changed ddf (but no change in p-value)
tidy(model,
     conf.int = TRUE,
     exponentiate = TRUE,
     ddf = 5)
#> # A tibble: 2 x 7
#>   term        estimate std.error statistic  p.value conf.low conf.high
#>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
#> 1 (Intercept)     4.42     0.179      8.28 1.89e-14    2.79       7.01
#> 2 yr.rndYes       2.19     0.702      1.12 2.64e- 1    0.361     13.3

## Correct p-value for ddf = 5 (df.resid = 5)
summary(model, df.resid = 5)
#> 
#> Call:
#> svyglm(formula = sch.wide ~ yr.rnd, design = dstrat, family = binomial)
#> 
#> Survey design:
#> svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat, 
#>     fpc = ~fpc)
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   1.4857     0.1794   8.280 0.000419 ***
#> yr.rndYes     0.7853     0.7017   1.119 0.313952    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1.005025)
#> 
#> Number of Fisher Scoring iterations: 4

## Updated function
tidy_svyglm <- function(x, conf.int = FALSE, conf.level = 0.95,
                        exponentiate = FALSE, ddf = NULL, ...) {
    ret <- as_tibble(summary(x, df.resid = ddf)$coefficients, rownames = "term")
    colnames(ret) <- c("term", "estimate", "std.error", "statistic", "p.value")
    
    coefs <- tibble::enframe(stats::coef(x), name = "term", value = "estimate")
    ret <- left_join(coefs, ret, by = c("term", "estimate"))
    
    if (conf.int) {
        ci <- broom_confint_terms(x, level = conf.level, ddf = ddf, ...)
        ret <- left_join(ret, ci, by = "term")
    }
    
    if (exponentiate) {
        ret <- exponentiate(ret)
    }
    
    ret
}

environment(tidy_svyglm) <- asNamespace("broom")
assignInNamespace("tidy.svyglm",
                  value = tidy_svyglm,
                  ns = "broom")

## Correct p-value for ddf = 5 (df.resid = 5)
tidy(model,
     conf.int = TRUE,
     exponentiate = TRUE,
     ddf = 5)
#> # A tibble: 2 x 7
#>   term        estimate std.error statistic  p.value conf.low conf.high
#>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
#> 1 (Intercept)     4.42     0.179      8.28 0.000419    2.79       7.01
#> 2 yr.rndYes       2.19     0.702      1.12 0.314       0.361     13.3

^{Created on 2022-05-03 by the reprex package (v2.0.1)}

May 04 '22 04:05 ray-p144

This issue has been automatically closed due to inactivity.

May 05 '23 00:05 github-actions[bot]

This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex: https://reprex.tidyverse.org) and link to this issue.

May 20 '23 00:05 github-actions[bot]

broom broom copied to clipboard

P-value incorrect when using ddf argument in tidy.svyglm()

The problem

Reproducible example

broom
broom copied to clipboard