FileIO.jl icon indicating copy to clipboard operation
FileIO.jl copied to clipboard

FileIO is not threadsafe

Open c42f opened this issue 3 years ago • 4 comments

Debugging some concurrency issues, I ended up inside the source of FileIO and saw that it maintains global state in sym2loader and sym2safer (and maybe other places) which is not protected by locks.

Presumably this can cause problems when these dictionaries are mutated during load, etc.

I don't have time to make an MWE right now, but I thought I'd leave a note about this nevertheless.

Here's a sample stacktrace from a loop which essentially does the following for a bunch of JPEG files:

@sync for f in files
    @spawn open(f) do io
        load(io)
    end
end
ERROR: TaskFailedException
    nested task error: concurrency violation detected
    Stacktrace:
      [1] error(s::String)
        @ Base ./error.jl:33
      [2] concurrency_violation()
        @ Base ./condition.jl:8
      [3] assert_havelock
        @ ./condition.jl:25 [inlined]
      [4] assert_havelock
        @ ./condition.jl:48 [inlined]
      [5] assert_havelock
        @ ./condition.jl:72 [inlined]
      [6] wait(c::Condition)
        @ Base ./condition.jl:102
      [7] _require(pkg::Base.PkgId)
        @ Base ./loading.jl:978
      [8] require(uuidkey::Base.PkgId)
        @ Base ./loading.jl:914
      [9] action(::Symbol, ::Vector{Union{Base.PkgId, Module}}, ::Formatted; options::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
        @ FileIO ~/data/.julia/packages/FileIO/3jBq2/src/loadsave.jl:202
     [10] action
        @ ~/data/.julia/packages/FileIO/3jBq2/src/loadsave.jl:196 [inlined]
     [11] #load#15
        @ ~/data/.julia/packages/FileIO/3jBq2/src/loadsave.jl:120 [inlined]
     [12] load
        @ ~/data/.julia/packages/FileIO/3jBq2/src/loadsave.jl:117 [inlined]
     [13] #7
        @ /mnt/data/code/matt_test.jl:13 [inlined]
     [14] open(f::var"#7#10", args::String; kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
        @ Base ./io.jl:330
     [15] open(f::Function, args::String)
        @ Base ./io.jl:328
     [16] open(f::Function, ::Type{IO}, cache::JuliaHubDataRepos.DataRepoCache, path::DataSets.RelPath; kws::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
        @ JuliaHubDataRepos ~/data/.julia/dev/JuliaHubDataRepos/src/datasets_integration.jl:25
     [17] open(f::Function, ::Type{IO}, cache::JuliaHubDataRepos.DataRepoCache, path::DataSets.RelPath)
        @ JuliaHubDataRepos ~/data/.julia/dev/JuliaHubDataRepos/src/datasets_integration.jl:23
     [18] #open#35
        @ ~/data/.julia/packages/DataSets/kkNQy/src/BlobTree.jl:165 [inlined]
     [19] open(f::Function, file::Blob{JuliaHubDataRepos.DataRepoCache})
        @ DataSets ~/data/.julia/packages/DataSets/kkNQy/src/BlobTree.jl:165
     [20] (::var"#6#9"{BlobTree{JuliaHubDataRepos.DataRepoCache}, DataSets.RelPath})()
        @ Main ./threadingconstructs.jl:169
    Stacktrace:
      [1] handle_error(e::ErrorException, q::Base.PkgId, bt::Vector{Union{Ptr{Nothing}, Base.InterpreterIP}})
        @ FileIO ~/data/.julia/packages/FileIO/3jBq2/src/error_handling.jl:61
      [2] handle_exceptions(exceptions::Vector{Tuple{Any, Union{Base.PkgId, Module}, Vector{T} where T}}, action::String)
        @ FileIO ~/data/.julia/packages/FileIO/3jBq2/src/error_handling.jl:56
      [3] action(::Symbol, ::Vector{Union{Base.PkgId, Module}}, ::Formatted; options::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
        @ FileIO ~/data/.julia/packages/FileIO/3jBq2/src/loadsave.jl:225
      [4] action
        @ ~/data/.julia/packages/FileIO/3jBq2/src/loadsave.jl:196 [inlined]
      [5] #load#15
        @ ~/data/.julia/packages/FileIO/3jBq2/src/loadsave.jl:120 [inlined]
      [6] load
        @ ~/data/.julia/packages/FileIO/3jBq2/src/loadsave.jl:117 [inlined]
      [7] #7
        @ /mnt/data/code/matt_test.jl:13 [inlined]
      [8] open(f::var"#7#10", args::String; kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
        @ Base ./io.jl:330
      [9] open(f::Function, args::String)
        @ Base ./io.jl:328
     [10] open(f::Function, ::Type{IO}, cache::JuliaHubDataRepos.DataRepoCache, path::DataSets.RelPath; kws::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
        @ JuliaHubDataRepos ~/data/.julia/dev/JuliaHubDataRepos/src/datasets_integration.jl:25
     [11] open(f::Function, ::Type{IO}, cache::JuliaHubDataRepos.DataRepoCache, path::DataSets.RelPath)
        @ JuliaHubDataRepos ~/data/.julia/dev/JuliaHubDataRepos/src/datasets_integration.jl:23
     [12] #open#35
        @ ~/data/.julia/packages/DataSets/kkNQy/src/BlobTree.jl:165 [inlined]
     [13] open(f::Function, file::Blob{JuliaHubDataRepos.DataRepoCache})
        @ DataSets ~/data/.julia/packages/DataSets/kkNQy/src/BlobTree.jl:165
     [14] (::var"#6#9"{BlobTree{JuliaHubDataRepos.DataRepoCache}, DataSets.RelPath})()
        @ Main ./threadingconstructs.jl:169
...and 12 more exceptions.

c42f avatar May 26 '21 01:05 c42f

I see the require(uuidkey::Base.PkgId) is called in the error callstack, so this thread issue perhaps happens during the io backend loading time.

@IanButterworth is this the exact reason that we have checked_import in ImageIO.jl? https://github.com/JuliaIO/ImageIO.jl/blob/bae08524621a7b14c1d1d9e9d77c9117e3e85807/src/ImageIO.jl#L25-L32

johnnychen94 avatar May 26 '21 02:05 johnnychen94

Yes, the fact that the crash happens during a call to require() was concerning.

Perhaps this indicates that Base.require itself is not threadsafe, because it appears to use plain Base.Condition, rather than Threads.Condition.

c42f avatar May 26 '21 23:05 c42f

Calls to require should very likely be behind a lock.

KristofferC avatar May 27 '21 07:05 KristofferC

For the record, #339 fixes this for Julia >= 1.3. For old Julia versions, it's still broken.

johnnychen94 avatar Jun 03 '21 18:06 johnnychen94