YAXArrays.jl
YAXArrays.jl copied to clipboard
`mapCube` in Distributed model is many times slower
Serial: 8s
using YAXArrays, Statistics, Zarr
using DimensionalData
using Dates
axlist = (
Dim{:time}(Date("2022-01-01"):Day(1):Date("2022-12-31")),
Dim{:lon}(range(1, 10, length=1000)),
Dim{:lat}(range(1, 5, length=1500)),
Dim{:Variable}(["var1", "var2"])
)
data = rand(axlist...)
ds = YAXArray(axlist, data)
c = ds[Variable = At("var1")] # see OpenNetCDF to get the file
c2 = setchunks(c, (365, 100, 150))
function mymean(output, xin)
# @show "doing a mean"
output[:] .= mean(xin)
end
indims = InDims("time")
outdims = OutDims()
@time resultcube = mapCube(mymean, c2; indims, outdims)
Parallel: 660 seconds
using Distributed
addprocs(4)
# @everywhere using Pkg
# @everywhere Pkg.activate(".")
@everywhere begin
# using NetCDF
using YAXArrays
using Statistics
using Zarr
end
@everywhere function mymean(output, xin)
# @show "doing a mean"
output[:] .= mean(xin)
end
indims = InDims("time")
outdims = OutDims()
# turn out to be slower? why?
@time resultcube2 = mapCube(mymean, c2; indims, outdims)