Survey.jl
Survey.jl copied to clipboard
`jackknife_variance` hardcoded for binary `func`
In jackknife.jl
line 152,
θhj = func(design.data[!, x], rep_weights)
The func
will only apply over a single data vector x
. This is fine for mean
and total
, but wont work for ratio
, which needs y
In general case, we need a type system here so that func
with different number of args can work here
This issue is still relevant, but this PR changes the jackknife_variance
function and calls it variance
: https://github.com/xKDR/Survey.jl/pull/297
We can do the following:
function variance(x::Vector{Symbol}, func::Function, design::ReplicateDesign{BootstrapReplicates})
θ̂ = func(design.data, x, design.weights)
θ̂t = [
func(design.data, x, "replicate_"*string(i)) for
i = 1:design.replicates
]
variance = sum.((θ̂t .- θ̂) .^ 2) ./ design.replicates
return DataFrame(estimator = θ̂, SE = sqrt(variance))
end
function ratio(df::DataFrame, columns, weights)
return sum(df[!, columns[1]], StatsBase.weights(df[!, weights])) / sum(df[!, columns[2]], StatsBase.weights(df[!, weights]))
end
In the case of GLM,
function glm_tmp(df::DataFrame, columns, weights, link, family)
formula_str = string("$(columns[1]) ~ ", join(columns[2:end], " + "))
formula = eval(Meta.parse("@formula($formula_str)"))
coef(glm(formula, data, link, family))
end
@nadiaenh can you try this?
Can you also accept args
, and kwargs
in the variance
functions and pass them to func
? @nadiaenh