CorrectMatch.jl
CorrectMatch.jl copied to clipboard
Handling missing data?
I'm trying to follow the examples with my data which is incomplete, but the function uniqueness
doesn't handle Union{Int, Missing}
. According to your paper, your method is able to handle missing data, so I'm wondering if this was implemented?
Here's a minimal working example of the code throwing an error:
using StatsBase
using DataFrames
using CorrectMatch: Copula, Uniqueness, Individual
using Distributions
function checkrows(df::DataFrame)
for row in eachrow(df)
@assert !all([ismissing(i) for i in row])
end
end
function extract_marginal_ordered(row::AbstractVector)
cm = collect(values(countmap(row; alg=:dict)))
Categorical(cm / sum(cm))
end
N = 100; M = 3
df = DataFrame(a = rand(1:2, N), b = rand(1:10, N), c = rand(1:5, N))
# Hopefully you won't get an invalid row with all missing values
p = 0.95
mask = convert(Matrix{Union{Int, Missing}}, rand(Bernoulli(p), N, M))
replace!(mask, 0 => missing)
df = df .* mask
checkrows(df) # If assertion error, run again
data = convert(Matrix, df)
marginals = [extract_marginal_ordered(data[:, i]) for i=1:M];
G = fit_mle(Copula.GaussianCopula, marginals, data);
for indiv in eachrow(data)
shifted_indiv = indiv - [minimum(collect(skipmissing(col))) for col in eachcol(data)] .+ 1
println(Individual.individual_uniqueness(G, shifted_indiv, N))
end
which throws the following error:
ERROR: LoadError: MethodError: no method matching individual_uniqueness(::CorrectMatch.Copula.GaussianCopula, ::Array{Union{Missing, Int64},1}, ::Int64)
Closest candidates are:
individual_uniqueness(::CorrectMatch.Copula.GaussianCopula, ::AbstractArray{Int64,1}, ::Int64; iter) at /Users/karel/.julia/packages/CorrectMatch/Hf9Rq/src/Individual.jl:49
Stacktrace:
[1] top-level scope at /Users/karel/.julia/dev/CorrectMatch/examples/missing_data.jl:36
[2] include(::String) at ./client.jl:439
[3] top-level scope at REPL[76]:1
[4] eval(::Module, ::Any) at ./boot.jl:331
[5] eval_user_input(::Any, ::REPL.REPLBackend) at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.4/REPL/src/REPL.jl:86
[6] run_backend(::REPL.REPLBackend) at /Users/karel/.julia/packages/Revise/AMRie/src/Revise.jl:1023
[7] top-level scope at none:0
in expression starting at /Users/karel/.julia/dev/CorrectMatch/examples/missing_data.jl:34
I've installed the latest version using ] add CorrectMatch
.
Thanks for your help!