JSON3.jl icon indicating copy to clipboard operation
JSON3.jl copied to clipboard

Issue deserializing json within arrow table

Open ericphanson opened this issue 1 year ago • 1 comments

Here I have some JSON data embedded inside a struct which itself is inside a column of an arrow table, and I deserialize the JSON at fromarrowstruct-time. This gives errors like the following (also with Symbol, not just string):

conversion to pointer not defined for Arrow.Primitive{UInt8, Vector{UInt8}}
Stacktrace:
  [1] error(s::String)
    @ Base ./error.jl:35
  [2] unsafe_convert(::Type{Ptr{UInt8}}, a::Arrow.Primitive{UInt8, Vector{UInt8}})
    @ Base ./pointer.jl:67
  [3] pointer
    @ ./abstractarray.jl:1240 [inlined]
  [4] pointer(V::SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, i::Int64)
    @ Base ./subarray.jl:476
  [5] getvalue(::Type{String}, buf::SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, tape::SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}, tapeidx::Int64, t::UInt64)
    @ JSON3 ~/.julia/packages/JSON3/jSAdy/src/utils.jl:0
  [6] getindex
    @ ~/.julia/packages/JSON3/jSAdy/src/JSON3.jl:163 [inlined]
  [7] copyto_unaliased!
    @ ./abstractarray.jl:1088 [inlined]
  [8] copyto!(dest::Vector{String}, src::JSON3.Array{String, SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}})
    @ Base ./abstractarray.jl:1068
  [9] _collect_indices
    @ ./array.jl:777 [inlined]
 [10] collect
    @ ./array.jl:761 [inlined]
 [11] _show(io::IOBuffer, arr::JSON3.Array{String, SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}}, indent::Int64, offset::Int64)
    @ JSON3 ~/.julia/packages/JSON3/jSAdy/src/show.jl:39
 [12] _show(io::IOBuffer, obj::JSON3.Object{SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}}, indent::Int64, offset::Int64)
    @ JSON3 ~/.julia/packages/JSON3/jSAdy/src/show.jl:21
 [13] _show
    @ ~/.julia/packages/JSON3/jSAdy/src/show.jl:7 [inlined]
 [14] show(io::IOBuffer, j::JSON3.Object{SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}})
    @ JSON3 ~/.julia/packages/JSON3/jSAdy/src/show.jl:1
 [15] sprint(f::Function, args::JSON3.Object{SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}}; context::Nothing, sizehint::Int64)
    @ Base ./strings/io.jl:114
 [16] sprint
    @ ./strings/io.jl:107 [inlined]
 [17] repr(x::JSON3.Object{SubArray{UInt8, 1, Arrow.Primitive{UInt8, Vector{UInt8}}, Tuple{UnitRange{Int64}}, true}, SubArray{UInt64, 1, Arrow.Primitive{UInt64, Vector{UInt64}}, Tuple{UnitRange{Int64}}, true}})
    @ Base ./strings/io.jl:286

I was able to workaround it with:

function JSON3.getvalue(::Type{T}, buf::SubArray{UInt8,1,<:Arrow.Primitive}, tape, tapeidx,
                        t) where {T}
    return JSON3.getvalue(T, copy(buf), tape, tapeidx, t)
end

I guess something like that could be added to a JSON3<>Arrow package extension, but it might be better to have a more generic fallback for getvalue that doesn't need pointers.

ericphanson avatar Aug 19 '24 11:08 ericphanson

This should be fixed in the JSON.jl 1.0 release where we ensure pointer(buf, pos) is supported by the JSON source, otherwise the input is copied into an appropriate container that does.

quinnj avatar Apr 24 '25 16:04 quinnj