Impute.jl icon indicating copy to clipboard operation
Impute.jl copied to clipboard

ERROR: UndefVarError: T not defined

Open ufechner7 opened this issue 3 years ago • 5 comments
trafficstars

Code:

using CSV, DataFrames, Impute

function logfiles()
    files=readdir("data")
    filter!(files -> occursin(r"log_", files), files)
    non_empty_files = String[]
    for file in files
        if stat("data/" * file).size > 2000
            push!(non_empty_files, file)
        end
    end
    non_empty_files
end

function read_log(logfiles)
    df = nothing
    t_end = 0
    for logfile in logfiles
        df_new = CSV.read("data/" * logfile, DataFrame)
        if isnothing(df)
            df=df_new
            t_end = last(df.TIME)
        else
            t_start = first(df_new.TIME)
            if (t_start - t_end) > 60
               n = div(t_start - t_end + 30, 60)
               v = fill(missing, size(df)[2]-1)
               println("Missing: ", n, " minutes") 
               allowmissing!(df)
               for i in 1:n
                   v1 = vcat([i*60+t_end], v)
                   push!(df, v1)
               end
            end
            df = outerjoin(df, df_new, matchmissing=:equal, on = intersect(names(df),  names(df_new)))
            t_end = last(df.TIME)
        end
    end

    df = Impute.interp(df)
return

df = read_log(logfiles())

Error message:

ERROR: UndefVarError: T not defined
Stacktrace:
  [1] _impute!(data::Vector{Any}, imp::Impute.Interpolate)
    @ Impute ~/.julia/packages/Impute/AB7zS/src/imputors/interp.jl:55
  [2] impute!(data::Vector{Any}, imp::Impute.Interpolate; dims::Function, kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Impute ~/.julia/packages/Impute/AB7zS/src/imputors.jl:127
  [3] impute!(data::Vector{Any}, imp::Impute.Interpolate)
    @ Impute ~/.julia/packages/Impute/AB7zS/src/imputors.jl:127
  [4] impute!(table::DataFrame, imp::Impute.Interpolate; cols::Nothing)
    @ Impute ~/.julia/packages/Impute/AB7zS/src/imputors.jl:232
  [5] impute!
    @ ~/.julia/packages/Impute/AB7zS/src/imputors.jl:224 [inlined]
  [6] #impute#8
    @ ~/.julia/packages/Impute/AB7zS/src/imputors.jl:80 [inlined]
  [7] impute
    @ ~/.julia/packages/Impute/AB7zS/src/imputors.jl:80 [inlined]
  [8] interp(data::DataFrame; kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Impute ~/.julia/packages/Impute/AB7zS/src/functional.jl:76
  [9] interp(data::DataFrame)
    @ Impute ~/.julia/packages/Impute/AB7zS/src/functional.jl:75
 [10] read_log(logfiles::Vector{String})
    @ Main ~/repos/trader/src/logging.jl:45
 [11] top-level scope
    @ REPL[2]:1

Installed packages:

pkg> st
      Status `~/repos/trader/Project.toml`
  [336ed68f] CSV v0.9.11
  [8f4d0f93] Conda v1.5.2
  [a93c6f00] DataFrames v1.2.2
  [587475ba] Flux v0.12.8
  [53d20848] FluxArchitectures v0.1.3
  [38e38edf] GLM v1.5.1
  [cd3eb016] HTTP v0.9.17
  [f7bf1975] Impute v0.6.7
  [682c06a0] JSON v0.21.2
  [0f8b85d8] JSON3 v1.9.2
  [9b87118b] PackageCompiler v2.0.2
  [d96e819e] Parameters v0.12.3
  [58dd65bb] Plotly v0.4.1
  [f0f68f2c] PlotlyJS v0.18.8
  [91a5bcdd] Plots v1.24.2
  [438e738f] PyCall v1.92.5
  [d330b81b] PyPlot v2.10.0
  [f269a46b] TimeZones v1.6.2
  [de0858da] Printf
  [10745b16] Statistics

Julia version 1.6.3 on Ubuntu Linux 18.04.

Any idea?

ufechner7 avatar Nov 28 '21 17:11 ufechner7

The data to reproduce the bug can be downloaded here: https://gist.github.com/ufechner7/812ad4770ca7b2bea83429161b02b11a

ufechner7 avatar Nov 28 '21 18:11 ufechner7

One more finding: The error does NOT happen when I launch julia with the command: julia --project It DOES happen when I launch it with the command: julia -t auto --project

Any idea what is going on here?

ufechner7 avatar Nov 28 '21 18:11 ufechner7

The bug is in https://github.com/invenia/Impute.jl/blob/d08c5069f71507e4fee5e49e90f97c9527586a69/src/imputors/interp.jl#L55, which assumes that data has an eltype of Union{T,Missing}, and not just Missing. The latter case will cause T to be undefined, and throw the observed error.

jpsamaroo avatar Nov 28 '21 20:11 jpsamaroo

Nice, thanks for all the debugging details!

Hmm, I'm not sure if that's exactly a bug. If you try and interpolate among only missing values then I'd expect that to fail. I suppose we could throw a more explicit error vs falling back to the UndefVarError. What I find interesting is that this only happens with -t auto which make me think there's something weird going on with the multithreaded CSV file parsing? I'll try reproducing your example to see how the input types differ in those two cases.

rofinn avatar Nov 29 '21 17:11 rofinn

I'm afraid I was unable to reproduce your example on my local mac (1.6.1) or in a julia:1.6.4-buster docker container, using a range of 6-12 threads. Would you mind calling @show Tables.schema(df) right before calling Impute.interp(df) in your example? That should produce output that looks like:

Tables.schema(df) = Tables.Schema:
 :TIME                 Union{Missing, Int64}
 Symbol("1INCH-EUR")   Union{Missing, Float64}
 Symbol("AAVE-EUR")    Union{Missing, Float64}
 Symbol("ADA-EUR")     Union{Missing, Float64}
 ...

In case I missed something this is the environment I'm using.

https://gist.github.com/rofinn/c4dbe2a293a1a07463c895c09cdca7cd

rofinn avatar Nov 29 '21 21:11 rofinn