YAXArrays.jl
YAXArrays.jl copied to clipboard
Using a larger than memory NetCDF file should not lead to getindex error from the backendlist
If we get a larger than memory output we currently get the following error, when we don't load Zarr. We should be either falling back to other backends that might be available or give a better error message.
# Dataset "lst"
lst = ds[Variable="t2m"]
YAXArray with the following dimensions
longitude Axis with 101 Elements from -80.0 to -55.0
latitude Axis with 101 Elements from 65.0 to 40.0
number Axis with 50 Elements from 1 to 50
time Axis with 721 Elements from 2019-05-01T00:00:00 to 2019-10-28T00:00:00
units: K
Total size: 2.74 GB
# Time handling
time_to_index = getAxis("time", lst)
time_index = yearmonthday.(time_to_index)
new_dates = unique(time_index)
index_in_cube = [findall(==(i), time_index) for i in unique(time_index)]
# Functions
function maximum_by_index(xout, xin; index_list = time_to_index)
#@show size(xin)
#@show typeof(xin)
xout .= NaN
if !all(isnan, xin)
for i in eachindex(index_list)
if !all(isnan, xin[index_list[i]])
xout[i] = maximum(filter(!isnan, xin[index_list[i]]))
end
end
end
end
function dates_builder(x)
out = Date[]
for i in eachindex(x)
push!(out, Date(x[i][1], x[i][2]))
end
return out
end
Indims = InDims("time")
outdims = OutDims(RangeAxis("time", dates_builder(new_dates)))
t2m_daily_high = mapCube(maximum_by_index, lst, indims = Indims, outdims = outdims; index_list = index_in_cube, showprog = true)
Error message about a key with Zarr (the file is a netCDF).
┌ Warning: There are still cache misses
└ @ YAXArrays.DAT ~/.julia/packages/YAXArrays/au5n4/src/DAT/DAT.jl:1070
KeyError: key :zarr not found
Stacktrace:
[1] getindex(h::OrderedCollections.OrderedDict{Symbol, Any}, key::Symbol)
@ OrderedCollections ~/.julia/packages/OrderedCollections/PRayh/src/ordered_dict.jl:380
[2] getbackend(oc::YAXArrays.DAT.OutputCube, ispar::Base.RefValue{Bool}, max_cache::Float64)
@ YAXArrays.DAT ~/.julia/packages/YAXArrays/au5n4/src/DAT/DAT.jl:784
[3] generateOutCube(oc::YAXArrays.DAT.OutputCube, ispar::Base.RefValue{Bool}, max_cache::Float64, loopcachesize::Tuple{Int64, Int64, Int64}, co::Tuple{Int64, Int64, Int64})
@ YAXArrays.DAT ~/.julia/packages/YAXArrays/au5n4/src/DAT/DAT.jl:846
[4] (::YAXArrays.DAT.var"#131#132"{YAXArrays.DAT.DATConfig{1, 1}, Tuple{Int64, Int64, Int64}, Tuple{Int64, Int64, Int64}})(c::YAXArrays.DAT.OutputCube)
@ YAXArrays.DAT ~/.julia/packages/YAXArrays/au5n4/src/DAT/DAT.jl:842
[5] foreach(f::YAXArrays.DAT.var"#131#132"{YAXArrays.DAT.DATConfig{1, 1}, Tuple{Int64, Int64, Int64}, Tuple{Int64, Int64, Int64}}, itr::Tuple{YAXArrays.DAT.OutputCube})
@ Base ./abstractarray.jl:2694
[6] generateOutCubes(dc::YAXArrays.DAT.DATConfig{1, 1})
@ YAXArrays.DAT ~/.julia/packages/YAXArrays/au5n4/src/DAT/DAT.jl:841
[7] mapCube(::typeof(maximum_by_index), ::Tuple{YAXArray{Union{Missing, Float64}, 4, DiskArrays.SubDiskArray{Union{Missing, Float64}, 4}, Vector{CubeAxis}}}; max_cache::Float64, indims::InDims, outdims::OutDims, inplace::Bool, ispar::Bool, debug::Bool, include_loopvars::Bool, showprog::Bool, irregular_loopranges::Bool, nthreads::Vector{Int64}, loopchunksize::Dict{Any, Any}, kwargs::Base.Pairs{Symbol, Vector{Vector{Int64}}, Tuple{Symbol}, NamedTuple{(:index_list,), Tuple{Vector{Vector{Int64}}}}})
@ YAXArrays.DAT ~/.julia/packages/YAXArrays/au5n4/src/DAT/DAT.jl:472
[8] #mapCube#36
@ ~/.julia/packages/YAXArrays/au5n4/src/DAT/DAT.jl:303 [inlined]
[9] top-level scope
@ In[13]:1
[10] eval
@ ./boot.jl:373 [inlined]
[11] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base ./loading.jl:1196
Edit - The code work if I extract a single member of the ensemble (e.g. number=21):
t2m_daily_high = mapCube(maximum_by_index, ds[Variable="t2m", number=21], indims = Indims, outdims = outdims; index_list = index_in_cube, showprog = true)
YAXArray with the following dimensions
time Axis with 181 Elements from 2019-05-01 to 2019-10-01
longitude Axis with 101 Elements from -80.0 to -55.0
latitude Axis with 101 Elements from 65.0 to 40.0
Total size: 42.26 MB
Originally posted by @Balinus in https://github.com/JuliaDataCubes/YAXArrays.jl/issues/217#issuecomment-1471778989