JSONTables.jl
JSONTables.jl copied to clipboard
Recovering the input table from a JSONified table
Hi there,
I have been using some code for transforming a DataFrame to JSON and back again, with the requirement that the de-JSONified DataFrame is an exact copy of the input DataFrame, eltypes and all. I'd like to make this code public, and see that this package has the same purpose but doesn't preserve types. Can we combine our efforts?
My code is below (...credit where it's due, this was written by Josh Bode).
Cheers, Jock
#=
Given `data::DataFrame`:
- Convert it to JSON: `x = JSON.json(data)`
- Parse it back out again: `data2 = convert(DataFrame, JSON.parse(x))`
- data2 is element-wise equal to data
=#
################################################################################
# Convert a DataFrame to JSON
JSON.lower(x::Enum) = string(x)
JSON.lower(::Missing) = Vector{Union{Missing,Any}}()
JSON.lower(x::Complex) = [real(x), imag(x)]
JSON.lower(x::Set) = collect(x)
JSON.lower(x::DataFrames.DataFrame) = Dict{String, Vector{Any}}(
"names" => DataFrames.names(x),
"types" => DataFrames.eltypes(x),
"columns" => DataFrames.columns(x)
)
JSON.lower(x::DataFrames.SubDataFrame) = JSON.lower(x[:])
################################################################################
# Convert data to a DataFrame, where data is parsed from JSON.
# Some data types need an explicit converter
function Base.convert(::Type{T}, x::AbstractString) where {T <: Union{Date, DateTime}}
T(x)
end
Base.convert(::Type{Char}, x::AbstractString) = x[1]
function Base.convert(::Type{Set{T}}, x::AbstractVector) where T
Set{T}(x)
end
function Base.convert(::Type{DataFrame}, x::Dict{String, Any})
names, types, columns = try
x["names"], x["types"], x["columns"]
catch e
error("Missing data: $(e.key)")
end
result = DataFrame()
for (name, typename, coldata) in zip(names, types, columns)
T1 = eval(Meta.parse(typename)) # E.g., Union{Missing, Int64}.
T2 = Missings.T(T1) # E.g., Int64
@assert isconcretetype(T2) || T2 === Any "Not a concrete type"
n = length(coldata)
colname = Symbol(name)
result[colname] = Vector{T1}(undef, n)
for i = 1:n
val = coldata[i]
result[i, colname] = val == nothing ? missing : convert(T2, val)
end
end
result
end
Parsers for custom types can be added. For example, here's one for ZonedDateTime
.
using TimeZones
function Base.convert(::Type{TimeZones.ZonedDateTime}, x::AbstractString)
x, tz = x[1:end-6], x[end-5:end]
ZonedDateTime(DateTime(x), TimeZones.FixedTimeZone(tz))
end
...and the test code
using Utils.jsontables # the code above
using DataFrames
using Dates
using JSON
using TimeZones
d = DataFrame(
a = [1,2,3,missing],
b = ['a', missing, 'c', 'd'],
c = rand(4),
d = Date.(["2018-10-01", "2018-10-02", "2018-10-03", "2010-10-04"])
)
d[:zdt] = [ZonedDateTime(DateTime(x), TimeZone("Australia/Melbourne")) for x in d[:d]]
s = JSON.json(d)
j = JSON.parse(s)
d2 = convert(DataFrame, j)
@test size(d2) == size(d)
@test names(d2) == names(d)
@test eltypes(d2) == eltypes(d)
function cell_values_are_all_equal(d1::DataFrame, d2::DataFrame)
good = true
n = size(d1, 1)
for colname in names(d1)
for i = 1:n
val1 = d1[i, colname]
val2 = d2[i, colname]
ismissing(val1) && ismissing(val2) && continue
bad = (ismissing(val1) && !ismissing(val2)) || (!ismissing(val1) && ismissing(val2)) || (val1 != val2)
if bad == true
good = false
break
end
end
good == false && break
end
good
end
@test cell_values_are_all_equal(d, d2)