Support for svystat/svyby objects
Hello,
Thanks for the wonderful package. At the moment, svystat and svyby objects from the survey package don't print as expected with pander. The default print method for these objects includes standards errors, and the pander output probably should too.
library(survey)
library(pander)
data(api)
dstrat <- svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
x <- svytotal(~sch.wide, dstrat)
class(x)
pander(x)
# ideally would want something like this
pander.svystat <- function(x = NULL, ...) {
x <- as.data.frame(x)
pandoc.table(x, ...)
}
pander.svyby <- pander.svystat
# works better now
pander(x)
pander(svyby(~api00,~sch.wide,dstrat,svymean))
It's not that helpful obviously, since the user could always as.data.frame themselves before calling pander, but I feel like the purpose of pander is to make printing objects work as-is.
Thanks
Same issue and same request here. Right now I use my own dirty hack to collect results from multiple svyby(), svymean(), svyquantile() calls, and then tabular() to pretty format. Would be nice is someone could build a generic method for survey objects.
svyCrossTab <- function(formula, by, design, quantiles=c(.25,.5,.75), labels=NULL) {
# Pass a list of formulas (list(~var1, ~var2, ...))
# Then use tables::tabular() to format nested tables
require(survey)
require(data.table)
require(stringr)
# Reshape means
x <- lapply(formula, function(x) {
m <- svyby(x, by, design, svymean, na.rm=TRUE)
m <- as.data.table(ftable(m))
return(m)
})
x <- rbindlist(x)
setnames(x, c("by", "stat", "var", "Mean"))
x[stat=="svymean", stat := "est."]
x[stat=="SE", stat := "std. err."]
# Reshape quantiles
y <- lapply(formula, function(x) {
m <- svyby(x, by, design, svyquantile, quantiles, ci=TRUE, na.rm=TRUE)
m <- as.data.table(ftable(m))
m[, var := as.character(x)[2]]
return(m)
})
y <- rbindlist(y)
setnames(y, c("by", "stat", "qtl", "value", "var"))
y[stat=="svyquantile", stat := "est."]
y[stat=="SE", stat := "std. err."]
y[, qtl := paste0("Q", as.numeric(as.character(qtl))*100)]
y <- dcast(var+by+stat~qtl, data=y)
setkey(x, var, by, stat)
setkey(y, var, by, stat)
x <- x[y]
# Add optional labels
if(!missing(labels)) for (i in seq_along(var)) x[var==as.character(formula[[i]])[2],
var := labels[i]]
x[, var := factor(str_replace(var, fixed("I("), fixed("(")), ordered=T)]
x[, stat := factor(as.character(stat), ordered=T)]
setcolorder(x, c(3,1,2,4:ncol(x)))
setnames(x, 1:3, c("Variable", "By", "Stat"))
return(x)
}
# Then a little tabular() magic
pander(tabular(
Variable*(Mean+Q25+Q50+Q75)~Heading()*By*Heading()*Stat*Heading()*Justify(c,r)*Format(digits=0, big.mark=",", nsmall=2)*identity,
data=dt),
emphasize.italics.cols=c(4,6,8), split.table=Inf,
caption="Tab. My Caption")
------------------------------------------------------------------------------------------
\ \ SSHF\ \ TSHF\ \ CSHF\ \
Variable est. std. err. est. std. err. est. std. err.
------------------- ------ -------- ----------- -------- ----------- --------- -----------
(aggross/1000) Mean 26,344 *1,452* 27,179 *1,120* 514 *95*
Q25 139 *699* 8,187 *878* 44 *6*
Q50 18,266 *2,255* 20,195 *1,273* 145 *29*
Q75 41,643 *3,258* 38,530 *1,997* 417 *59*
(agsales/1000) Mean 349 *158* 313 *28* 477 *95*
Q25 0 *0* 16 *1* 35 *5*
Q50 0 *0* 57 *5* 108 *18*
Q75 33 *8* 180 *20* 331 *60*
------------------------------------------------------------------------------------------
Table: Tab. My Caption