YAXArrays.jl
YAXArrays.jl copied to clipboard
Cube from map of cubes with different chunk sizes can't be mapcubed
When I compute the difference of two cubes with map from cubes with different chunksizes, the computation is done correctly and I can access the data but I can't use mapCube afterwards anymore. Tried to save the cube and to construct a CubeTable.
wcordiff = map(wcor, wcorann) do x,y
x - y
end
julia> YAXArrays.Cubes.cubechunks(wcor)
(5820, 4290, 1)
julia> YAXArrays.Cubes.cubechunks(wcorann)
(1000, 1000, 1)
I get the following error, when I try to save the resulting cube:
julia> savecube(wcordiff, "data/cubes/wcordiff.zarr")
ERROR: Chunks do not align in dimension 1
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:33
[2] (::DiskArrays.var"#52#60"{Vector{Tuple{Tuple{Int64, Int64, Int64}, Tuple{Int64, Int64, Int64}}}})(n::Int64)
@ DiskArrays ~/.julia/packages/DiskArrays/tD6J0/src/ops.jl:59
[3] ntuple
@ ./ntuple.jl:19 [inlined]
[4] common_chunks(::Tuple{Int64, Int64, Int64}, ::ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}, ::Vararg{ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}})
@ DiskArrays ~/.julia/packages/DiskArrays/tD6J0/src/ops.jl:55
[5] eachchunk
@ ~/.julia/packages/DiskArrays/tD6J0/src/ops.jl:26 [inlined]
[6] cubechunks(c::YAXArray{Union{Missing, Float32}, 3, DiskArrays.BroadcastDiskArray{Union{Missing, Float32}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, var"#6#7", Tuple{ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}, ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}}}}, Vector{CubeAxis}})
@ YAXArrays.Cubes ~/Documents/papers_wip/EMDAmazonas/dev/YAXArrays/src/Cubes/Cubes.jl:150
[7] (::YAXArrays.DAT.var"#137#140"{YAXArrays.DAT.DATConfig{1, 1}, Dict{Int64, Int64}, Vector{NamedTuple{(:iloopax, :cs, :iscompressed, :innerleap, :preventpar), Tuple{Int64, Int64, Bool, Int64, Bool}}}})(lax::RangeAxis{Float64, :X, StepRangeLen{Float64, Base.TwicePrecision{Float64}, Base.TwicePrecision{Float64}, Int64}}, ilax::Int64)
@ YAXArrays.DAT ~/Documents/papers_wip/EMDAmazonas/dev/YAXArrays/src/DAT/DAT.jl:883
[8] foreach(::Function, ::Vector{CubeAxis}, ::UnitRange{Int64})
@ Base ./abstractarray.jl:2695
[9] getCacheSizes(dc::YAXArrays.DAT.DATConfig{1, 1}, loopchunksizes::Dict{Any, Any})
@ YAXArrays.DAT ~/Documents/papers_wip/EMDAmazonas/dev/YAXArrays/src/DAT/DAT.jl:875
[10] mapCube(::YAXArrays.DAT.var"#cop#269", ::Tuple{YAXArray{Union{Missing, Float32}, 3, DiskArrays.BroadcastDiskArray{Union{Missing, Float32}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, var"#6#7", Tuple{ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}, ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}}}}, Vector{CubeAxis}}}; max_cache::Float64, indims::InDims, outdims::OutDims, inplace::Bool, ispar::Bool, debug::Bool, include_loopvars::Bool, showprog::Bool, nthreads::Vector{Int64}, loopchunksize::Dict{Any, Any}, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ YAXArrays.DAT ~/Documents/papers_wip/EMDAmazonas/dev/YAXArrays/src/DAT/DAT.jl:431
[11] #mapCube#38
@ ~/Documents/papers_wip/EMDAmazonas/dev/YAXArrays/src/DAT/DAT.jl:281 [inlined]
[12] savecube(c::YAXArray{Union{Missing, Float32}, 3, DiskArrays.BroadcastDiskArray{Union{Missing, Float32}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, var"#6#7", Tuple{ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}, ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}}}}, Vector{CubeAxis}}, name::String; chunksize::Dict{Any, Any}, max_cache::Float64, backend::Symbol, backendargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ YAXArrays.DAT ~/Documents/papers_wip/EMDAmazonas/dev/YAXArrays/src/DAT/CubeIO.jl:53
[13] savecube(c::YAXArray{Union{Missing, Float32}, 3, DiskArrays.BroadcastDiskArray{Union{Missing, Float32}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, var"#6#7", Tuple{ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}, ZArray{Union{Missing, Float32}, 3, Zarr.BloscCompressor, DirectoryStore}}}}, Vector{CubeAxis}}, name::String)
@ YAXArrays.DAT ~/Documents/papers_wip/EMDAmazonas/dev/YAXArrays/src/DAT/CubeIO.jl:17
[14] top-level scope
@ REPL[29]:1
For now I can circumvent the problem by constructing a new cube, but I am not sure, whether this is the best approach:
julia> newdiff = YAXArray(wcordiff.axes, wcordiff[:,:,:])
I have done a lot of work in the last days on operations of chunked cubes so the issue might already be resolved. So you can either try this set of branches https://github.com/noralinscheid/xaida/issues/1
There must have been some copy/paste bug in your example, what is the output of YAXArrays.Cubes.cubechunks(wcor)?
Yes there was a copy paste mishap this is the cubechunks of wcor:
julia> YAXArrays.Cubes.cubechunks(wcor)
(5820, 4290, 1)
I also edited the original post. I am going to check the other branches next week.
This seems to be related to https://github.com/meggart/DiskArrays.jl/issues/35
This is still a problem in 0.4.4. I am running into this, when I try to save a cube after a map operation between two cubes where I am using setchunks to bring all chunks together. The following code with the attached files show the error.
using YAXArrays
wcorbinned = Cube("smallwcor.zarr")]
signifmask = Cube("smallmask.zarr")
cormaskedbin = map((x,y)-> x*y, setchunks(permutedims(signifmask, YAXArrays.Axes.findAxis.(caxes(signifmask), (wcorbinned,))), wcorbinned.chunks), wcorbinned)
savecube(cormaskedbin, "data/s1cube_jurua_freqbin_watercorrelation_significance.zarr", overwrite=true)
ERROR: Chunks do not align in dimension 1
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] (::DiskArrays.var"#75#79"{Int64, Int64})(i::Int64, ch::DiskArrays.GridChunks{3})
@ DiskArrays ~/.julia/packages/DiskArrays/f8PI0/src/broadcast.jl:96
[3] #4
@ ./generator.jl:36 [inlined]
[4] iterate
@ ./generator.jl:47 [inlined]
[5] collect_to!
@ ./array.jl:845 [inlined]
[6] collect_to_with_first!
@ ./array.jl:823 [inlined]
[7] collect(itr::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Int64}, Vector{DiskArrays.GridChunks{3}}}}, Base.var"#4#5"{DiskArrays.var"#75#79"{Int64, Int64}}})
@ Base ./array.jl:797
[8] map
@ ./abstractarray.jl:3055 [inlined]
[9] merge_chunks(csnow::Vector{DiskArrays.GridChunks{3}}, n::Int64)
@ DiskArrays ~/.julia/packages/DiskArrays/f8PI0/src/broadcast.jl:94
[10] #65
@ ~/.julia/packages/DiskArrays/f8PI0/src/broadcast.jl:72 [inlined]
[11] ntuple(f::DiskArrays.var"#65#70"{Tuple{Int64, Int64, Int64}, Vector{DiskArrays.GridChunks{3}}}, n::Int64)
@ Base ./ntuple.jl:19
[12] common_chunks(::Tuple{Int64, Int64, Int64}, ::DiskArrays.PermutedDiskArray{Union{Missing, Bool}, 3, PermutedDimsArray{Union{Missing, Bool}, 3, (2, 1, 3), (2, 1, 3), DiskArrays.BroadcastDiskArray{Union{Missing, Bool}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, Base.var"#97#98"{typeof(iszero)}, Tuple{DiskArrayTools.CFDiskArray{Int64, 3, Int64, Zarr.ZArray{Int64, 3, Zarr.BloscCompressor, Zarr.DirectoryStore}}}}}}}, ::Vararg{Any})
@ DiskArrays ~/.julia/packages/DiskArrays/f8PI0/src/broadcast.jl:63
[13] eachchunk
@ ~/.julia/packages/DiskArrays/f8PI0/src/broadcast.jl:51 [inlined]
[14] get_copy_buffer_size(incube::DiskArrays.BroadcastDiskArray{Union{Missing, Float32}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, var"#41#42", Tuple{DiskArrays.PermutedDiskArray{Union{Missing, Bool}, 3, PermutedDimsArray{Union{Missing, Bool}, 3, (2, 1, 3), (2, 1, 3), DiskArrays.BroadcastDiskArray{Union{Missing, Bool}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, Base.var"#97#98"{typeof(iszero)}, Tuple{DiskArrayTools.CFDiskArray{Int64, 3, Int64, Zarr.ZArray{Int64, 3, Zarr.BloscCompressor, Zarr.DirectoryStore}}}}}}}, DiskArrays.SubDiskArray{Union{Missing, Float32}, 3}}}}, outcube::DiskArrayTools.CFDiskArray{Float32, 3, Float32, Zarr.ZArray{Float32, 3, Zarr.BloscCompressor, Zarr.DirectoryStore}}; writefac::Float64, maxbuf::Float64, align_output::Bool)
@ YAXArrays.Cubes ~/.julia/packages/YAXArrays/Fe7F8/src/Cubes/Rechunker.jl:61
[15] copy_diskarray(incube::DiskArrays.BroadcastDiskArray{Union{Missing, Float32}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, var"#41#42", Tuple{DiskArrays.PermutedDiskArray{Union{Missing, Bool}, 3, PermutedDimsArray{Union{Missing, Bool}, 3, (2, 1, 3), (2, 1, 3), DiskArrays.BroadcastDiskArray{Union{Missing, Bool}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, Base.var"#97#98"{typeof(iszero)}, Tuple{DiskArrayTools.CFDiskArray{Int64, 3, Int64, Zarr.ZArray{Int64, 3, Zarr.BloscCompressor, Zarr.DirectoryStore}}}}}}}, DiskArrays.SubDiskArray{Union{Missing, Float32}, 3}}}}, outcube::DiskArrayTools.CFDiskArray{Float32, 3, Float32, Zarr.ZArray{Float32, 3, Zarr.BloscCompressor, Zarr.DirectoryStore}}; writefac::Float64, maxbuf::Float64, align_output::Bool)
@ YAXArrays.Cubes ~/.julia/packages/YAXArrays/Fe7F8/src/Cubes/Rechunker.jl:93
[16] copydataset!(diskds::Dataset, ds::Dataset; writefac::Float64, maxbuf::Float64)
@ YAXArrays.Datasets ~/.julia/packages/YAXArrays/Fe7F8/src/DatasetAPI/Datasets.jl:401
[17] savedataset(ds::Dataset; path::String, persist::Nothing, overwrite::Bool, append::Bool, skeleton::Bool, backend::Symbol, driver::Symbol, max_cache::Float64, writefac::Float64)
@ YAXArrays.Datasets ~/.julia/packages/YAXArrays/Fe7F8/src/DatasetAPI/Datasets.jl:525
[18] savecube(c::YAXArray{Union{Missing, Float32}, 3, DiskArrays.BroadcastDiskArray{Union{Missing, Float32}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, var"#41#42", Tuple{DiskArrays.PermutedDiskArray{Union{Missing, Bool}, 3, PermutedDimsArray{Union{Missing, Bool}, 3, (2, 1, 3), (2, 1, 3), DiskArrays.BroadcastDiskArray{Union{Missing, Bool}, 3, Base.Broadcast.Broadcasted{DiskArrays.ChunkStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, Base.var"#97#98"{typeof(iszero)}, Tuple{DiskArrayTools.CFDiskArray{Int64, 3, Int64, Zarr.ZArray{Int64, 3, Zarr.BloscCompressor, Zarr.DirectoryStore}}}}}}}, DiskArrays.SubDiskArray{Union{Missing, Float32}, 3}}}}, Vector{CubeAxis}}, path::String; name::String, datasetaxis::String, max_cache::Float64, backend::Symbol, driver::Symbol, chunks::Nothing, overwrite::Bool, append::Bool, skeleton::Bool, writefac::Float64)
@ YAXArrays.Datasets ~/.julia/packages/YAXArrays/Fe7F8/src/DatasetAPI/Datasets.jl:566
[19] top-level scope
@ REPL[364]:1