IndexedTables.jl icon indicating copy to clipboard operation
IndexedTables.jl copied to clipboard

Error joining tables with CategoricalValues

Open nicoleepp opened this issue 7 years ago • 4 comments

To reproduce:

julia> using CategoricalArrays, IndexedTables
julia> l = table(CategoricalArray([1, 1, 2, 2]), CategoricalArray([1, 2, 1, 2]), CategoricalArray([1, 2, 3, 4]), names=[:a, :b, :c], pkey=(:a, :b))
julia> r = table(CategoricalArray([0, 1, 1, 3]), CategoricalArray([1, 1, 2, 2]), CategoricalArray([1, 2, 3, 4]), names=[:a, :b, :d], pkey=(:a, :b))
julia> join(l, r)
ERROR: ArgumentError: CategoricalValue objects with different pools cannot be tested for order
Stacktrace:
 [1] isless(::CategoricalArrays.CategoricalValue{Int64,UInt32}, ::CategoricalArrays.CategoricalValue{Int64,UInt32}) at /Users/username/.julia/v0.6/CategoricalArrays/src/value.jl:139
 [2] cmp at ./operators.jl:303 [inlined]
 [3] macro expansion at /Users/username/.julia/v0.6/IndexedTables/src/columns.jl:420 [inlined]
 [4] rowcmp(::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::Int64, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::Int64) at /Users/username/.julia/v0.6/IndexedTables/src/columns.jl:416
 [5] _join!(::Val{:inner}, ::Val{false}, ::Val{false}, ::IndexedTables.#concat_tup, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_c_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_c_d{Array{CategoricalArrays.CategoricalValue{Int64,UInt32},1},Array{CategoricalArrays.CategoricalValue{Int64,UInt32},1}}}, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_c{CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_c{Array{CategoricalArrays.CategoricalValue{Int64,UInt32},1}}}, ::IndexedTables.Columns{NamedTuples._NT_d{CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_d{Array{CategoricalArrays.CategoricalValue{Int64,UInt32},1}}}, ::Void, ::Void, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_a_b{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_c{CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::IndexedTables.Columns{NamedTuples._NT_d{CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}, ::Base.OneTo{Int64}, ::Base.OneTo{Int64}, ::Void, ::Void) at /Users/username/.julia/v0.6/IndexedTables/src/join.jl:67
 [6] #join#262(::Symbol, ::Bool, ::Tuple{Symbol,Symbol}, ::Tuple{Symbol,Symbol}, ::Tuple{Int64}, ::Tuple{Int64}, ::Void, ::Bool, ::Void, ::Void, ::Bool, ::Base.#join, ::IndexedTables.#concat_tup, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}) at /Users/username/.julia/v0.6/IndexedTables/src/join.jl:449
 [7] (::Base.#kw##join)(::Array{Any,1}, ::Base.#join, ::Function, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}) at ./<missing>:0
 [8] #join#267(::Symbol, ::Array{Any,1}, ::Function, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}) at /Users/username/.julia/v0.6/IndexedTables/src/join.jl:504
 [9] join(::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_c{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}, ::IndexedTables.NextTable{IndexedTables.Columns{NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32},CategoricalArrays.CategoricalValue{Int64,UInt32}},NamedTuples._NT_a_b_d{CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}},CategoricalArrays.CategoricalArray{Int64,1,UInt32,Int64,CategoricalArrays.CategoricalValue{Int64,UInt32},Union{}}}}}) at /Users/username/.julia/v0.6/IndexedTables/src/join.jl:503

nicoleepp avatar May 08 '18 19:05 nicoleepp

Currently in IndexedTables, categorical-like data is represented by PooledArrays. They are simpler than CategoricalArrays mainly because the elements aren't special CategoricalValues. You might want to try and see if PooledArrays are sufficient for your application.

andreasnoack avatar May 08 '18 19:05 andreasnoack

So there is no plan to support CategoricalValues?

nicoleepp avatar May 08 '18 19:05 nicoleepp

Not that I'm aware of.

andreasnoack avatar May 08 '18 20:05 andreasnoack

Slightly off-topic, we may need to think about this for integration with StatsModels though, as they use categorical values to determine whether to treat the variable as categorical or continuous.

piever avatar May 08 '18 20:05 piever