IndexedTables.jl
IndexedTables.jl copied to clipboard
Error joining tables with CategoricalValues
To reproduce:
julia> using CategoricalArrays, IndexedTables
julia> l = table(CategoricalArray([1, 1, 2, 2]), CategoricalArray([1, 2, 1, 2]), CategoricalArray([1, 2, 3, 4]), names=[:a, :b, :c], pkey=(:a, :b))
julia> r = table(CategoricalArray([0, 1, 1, 3]), CategoricalArray([1, 1, 2, 2]), CategoricalArray([1, 2, 3, 4]), names=[:a, :b, :d], pkey=(:a, :b))
julia> join(l, r)
ERROR: ArgumentError: CategoricalValue objects with different pools cannot be tested for order
Stacktrace:
[1] isless(::CategoricalArrays.CategoricalValue{Int64,UInt32}, ::CategoricalArrays.CategoricalValue{Int64,UInt32}) at /Users/username/.julia/v0.6/CategoricalArrays/src/value.jl:139
[2] cmp at ./operators.jl:303 [inlined]
[3] macro expansion at /Users/username/.julia/v0.6/IndexedTables/src/columns.jl:420 [inlined]
[4] rowcmp(::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::Int64, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::Int64) at /Users/username/.julia/v0.6/IndexedTables/src/columns.jl:416
[5] _join!(::Val{:inner}, ::Val{false}, ::Val{false}, ::IndexedTables.#concat_tup, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_c_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_c_d{Array{CategoricalArrays.CategoricalValue{Int64,UInt32},1},Array{CategoricalArrays.CategoricalValue{Int64,UInt32},1}}}, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_c{CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_c{Array{CategoricalArrays.CategoricalValue{Int64,UInt32},1}}}, ::IndexedTables.Columns{NamedTuples._NT_d{CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_d{Array{CategoricalArrays.CategoricalValue{Int64,UInt32},1}}}, ::Void, ::Void, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_c{CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_d{CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::Base.OneTo{Int64}, ::Base.OneTo{Int64}, ::Void, ::Void) at /Users/username/.julia/v0.6/IndexedTables/src/join.jl:67
[6] #join#262(::Symbol, ::Bool, ::Tuple{Symbol,Symbol}, ::Tuple{Symbol,Symbol}, ::Tuple{Int64}, ::Tuple{Int64}, ::Void, ::Bool, ::Void, ::Void, ::Bool, ::Base.#join, ::IndexedTables.#concat_tup, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}) at /Users/username/.julia/v0.6/IndexedTables/src/join.jl:449
[7] (::Base.#kw##join)(::Array{Any,1}, ::Base.#join, ::Function, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}) at ./<missing>:0
[8] #join#267(::Symbol, ::Array{Any,1}, ::Function, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}) at /Users/username/.julia/v0.6/IndexedTables/src/join.jl:504
[9] join(::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}) at /Users/username/.julia/v0.6/IndexedTables/src/join.jl:503
Currently in IndexedTables, categorical-like data is represented by PooledArrays. They are simpler than CategoricalArrays mainly because the elements aren't special CategoricalValues. You might want to try and see if PooledArrays are sufficient for your application.
So there is no plan to support CategoricalValues?
Not that I'm aware of.
Slightly off-topic, we may need to think about this for integration with StatsModels though, as they use categorical values to determine whether to treat the variable as categorical or continuous.