JuliaDB.jl
JuliaDB.jl copied to clipboard
CSV Parsing Error in JuliaDB Tutorial
It seems the CSV parsing of the file in the JuliaDB tutorial (hflights.csv) is not working at present. Upon downloading that CSV file and trying to load it with loadtable, I get the following error:
Error parsing hflights.csv
ERROR: MethodError: no method matching pointer(::SubString{TextParse.VectorBackedUTF8String}, ::Int64)
Closest candidates are:
pointer(::String, ::Integer) at strings/string.jl:82
pointer(::SubString{String}, ::Integer) at strings/substring.jl:105
pointer(::TextParse.VectorBackedUTF8String, ::Integer) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/VectorBackedStrings.jl:16
...
_substring at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/field.jl:397 [inlined]
tryparsenext(::TextParse.StringToken{String}, ::SubString{TextParse.VectorBackedUTF8String}, ::Int64, ::Int64, ::TextParse.LocalOpts{UInt8,UInt8,UInt8}) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/field.jl:368
macro expansion at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/util.jl:27 [inlined]
tryparsenext(::TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}, ::SubString{TextParse.VectorBackedUTF8String}, ::Int64, ::Int64, ::TextParse.LocalOpts{UInt8,UInt8,UInt8}) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/field.jl:493
macro expansion at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/util.jl:27 [inlined]
tryparsenext(::TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}}, ::SubString{TextParse.VectorBackedUTF8String}, ::Int64, ::Int64, ::TextParse.LocalOpts{UInt8,UInt8,UInt8}) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/field.jl:682
macro expansion at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/util.jl:27 [inlined]
quotedsplit(::SubString{TextParse.VectorBackedUTF8String}, ::TextParse.LocalOpts{UInt8,UInt8,UInt8}, ::Bool, ::Int64, ::Int64) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/csv.jl:671
quotedsplit(::SubString{TextParse.VectorBackedUTF8String}, ::TextParse.LocalOpts{UInt8,UInt8,UInt8}, ::Bool) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/csv.jl:662
#_csvread_internal#26(::Bool, ::Char, ::Char, ::Nothing, ::Type, ::Type, ::Bool, ::Int64, ::Nothing, ::Nothing, ::Int64, ::Array{Any,1}, ::Bool, ::Array{String,1}, ::Array{String,1}, ::OrderedCollections.OrderedDict{Union{Int64, String},AbstractArray{T,1} where T}, ::Int64, ::Nothing, ::Array{Any,1}, ::String, ::Int64, ::typeof(TextParse._csvread_internal), ::TextParse.VectorBackedUTF8String, ::Char) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/csv.jl:367
(::getfield(TextParse, Symbol("#kw##_csvread_internal")))(::NamedTuple{(:filename, :noresize, :colspool, :samecols),Tuple{String,Bool,OrderedCollections.OrderedDict{Union{Int64, String},AbstractArray{T,1} where T},Array{Any,1}}}, ::typeof(TextParse._csvread_internal), ::TextParse.VectorBackedUTF8String, ::Char) at ./none:0
(::getfield(TextParse, Symbol("##22#24")){Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:noresize, :colspool, :samecols),Tuple{Bool,OrderedCollections.OrderedDict{Union{Int64, String},AbstractArray{T,1} where T},Array{Any,1}}}},String,Char})(::IOStream) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/csv.jl:110
#open#310(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::getfield(TextParse, Symbol("##22#24")){Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:noresize, :colspool, :samecols),Tuple{Bool,OrderedCollections.OrderedDict{Union{Int64, String},AbstractArray{T,1} where T},Array{Any,1}}}},String,Char}, ::String, ::Vararg{String,N} where N) at ./iostream.jl:369
open at ./iostream.jl:367 [inlined]
#_csvread_f#20 at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/csv.jl:107 [inlined]
#_csvread_f at ./none:0 [inlined]
#csvread#25(::Base.Iterators.Pairs{Symbol,Array{Any,1},Tuple{Symbol},NamedTuple{(:samecols,),Tuple{Array{Any,1}}}}, ::Function, ::Array{String,1}, ::Char) at /home/ubuntu/.julia/packages/TextParse/IAMBB/src/csv.jl:125
(::getfield(TextParse, Symbol("#kw##csvread")))(::NamedTuple{(:samecols,),Tuple{Array{Any,1}}}, ::typeof(csvread), ::Array{String,1}, ::Char) at ./none:0
#_loadtable_serial#3(::Char, ::Array{Any,1}, ::Nothing, ::Nothing, ::Nothing, ::Bool, ::Bool, ::typeof(csvread), ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(JuliaDB._loadtable_serial), ::UnionAll, ::Array{String,1}) at /home/ubuntu/.julia/packages/JuliaDB/ZXPIx/src/util.jl:83
#_loadtable_serial at ./none:0 [inlined]
(::getfield(JuliaDB, Symbol("##190#193")){Array{Any,1},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},UnionAll})(::Array{String,1}) at /home/ubuntu/.julia/packages/JuliaDB/ZXPIx/src/io.jl:131
do_task(::Dagger.Context, ::Dagger.OSProc, ::Int64, ::Function, ::Tuple{Array{String,1}}, ::Bool, ::Bool, ::Bool) at /home/ubuntu/.julia/packages/Dagger/sdZXi/src/scheduler.jl:259
#143 at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:339 [inlined]
run_work_thunk(::getfield(Distributed, Symbol("##143#144")){typeof(Dagger.Sch.do_task),Tuple{Dagger.Context,Dagger.OSProc,Int64,getfield(JuliaDB, Symbol("##190#193")){Array{Any,1},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},UnionAll},Tuple{Array{String,1}},Bool,Bool,Bool},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}}, ::Bool) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.1/Distributed/src/process_messages.jl:56
#remotecall_fetch#148(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Distributed.LocalProcess, ::Dagger.Context, ::Vararg{Any,N} where N) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:364
remotecall_fetch(::Function, ::Distributed.LocalProcess, ::Dagger.Context, ::Vararg{Any,N} where N) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:364
#remotecall_fetch#152(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Int64, ::Dagger.Context, ::Vararg{Any,N} where N) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:406
remotecall_fetch at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:406 [inlined]
macro expansion at /home/ubuntu/.julia/packages/Dagger/sdZXi/src/scheduler.jl:272 [inlined]
(::getfield(Dagger.Sch, Symbol("##13#14")){Dagger.Context,Dagger.OSProc,Int64,getfield(JuliaDB, Symbol("##190#193")){Array{Any,1},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},UnionAll},Tuple{Array{String,1}},Channel{Any},Bool,Bool,Bool})() at ./task.jl:259
Stacktrace:
[1] compute_dag(::Dagger.Context, ::Dagger.Thunk) at /home/ubuntu/.julia/packages/Dagger/sdZXi/src/scheduler.jl:62
[2] compute(::Dagger.Context, ::Dagger.Thunk) at /home/ubuntu/.julia/packages/Dagger/sdZXi/src/compute.jl:25
[3] #fromchunks#47(::Nothing, ::Int64, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Array{Dagger.Thunk,1}) at /home/ubuntu/.julia/packages/JuliaDB/ZXPIx/src/table.jl:148
[4] (::getfield(JuliaDB, Symbol("#kw##fromchunks")))(::NamedTuple{(:output, :fnoffset),Tuple{Nothing,Int64}}, ::typeof(JuliaDB.fromchunks), ::Array{Dagger.Thunk,1}) at ./none:0
[5] #_loadtable#188(::Nothing, ::Nothing, ::Bool, ::Array{Any,1}, ::Bool, ::Bool, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Type, ::String) at /home/ubuntu/.julia/packages/JuliaDB/ZXPIx/src/io.jl:140
[6] _loadtable at /home/ubuntu/.julia/packages/JuliaDB/ZXPIx/src/io.jl:95 [inlined]
[7] #loadtable#186 at /home/ubuntu/.julia/packages/JuliaDB/ZXPIx/src/io.jl:63 [inlined]
[8] loadtable(::String) at /home/ubuntu/.julia/packages/JuliaDB/ZXPIx/src/io.jl:63
[9] top-level scope at none:0
I cannot tell what specific column is failing to be parsed.
See also:
- https://github.com/queryverse/CSVFiles.jl/issues/46
- https://github.com/JuliaComputing/TextParse.jl/pull/127
I've started looking into this an hour ago. Still haven't figured out what is going wrong, but I won't give up :)
Thank you!!
I would assume somewhere something like this is acting upon the VectorBackedUTF8String:
"Several standard functions like chop, chomp or strip return a SubString."
From the julia docs: https://docs.julialang.org/en/v1/manual/strings/index.html
So the specific error message re pointer had been fixed on master of TextParse.jl for a while, but then something went wrong with the column type detection logic, and I just proposed a new fix for that in the linked PR.