DelayDiffEq.jl icon indicating copy to clipboard operation
DelayDiffEq.jl copied to clipboard

Significant allocations in DDE interpolation for multiple time points

Open helengracehuang opened this issue 3 years ago • 5 comments

Originally posted in JuliaLang: Here’re two MWEs (one implemented with idxs and the other with in-place history function):

# Example 1
using DifferentialEquations;
using BenchmarkTools;

function computeNFkBFluxes!(nettFlux, concentration, delay, (birthday), time)
    @inbounds begin
        p = (birthday);
        hist_1_075 = delay(p, time-0.75; idxs=1);
        hist_2_075 = delay(p, time-0.75; idxs=2);
        hist_3_075 = delay(p, time-0.75; idxs=3);

        hist_2_1 = delay(p, time-1.0; idxs=2);
        hist_3_1 = delay(p, time-1.0; idxs=3);

        hist_1_15 = delay(p, time-1.5; idxs=1);
        hist_2_15 = delay(p, time-1.5; idxs=2);

        nettFlux[1] = (1/(1+1*(hist_2_075^2))) * (0.2 - 0.3)*concentration[1] - 0.5*concentration[1];
        nettFlux[2] = (1/(1+1*(hist_2_1^2))) * (1 - 0.2 + 0.3)*concentration[1] +
                (1/(1+1*(hist_1_15^2))) * (0.2 - 0.3)*concentration[2] - 1*concentration[2];
        nettFlux[3] = (1/(1+1*(hist_2_15^2))) * (1 - 0.2 + 0.3)*concentration[2] - 1*concentration[3];
        nettFlux[4] = (1/(1+1*(hist_1_075^2))) * (1 - 0.2 + 0.3)*concentration[3] - 1*concentration[4];
        nettFlux[5] = (1/(1+1*(hist_3_075^2))) * (1 - 0.2 + 0.3)*concentration[4] - 1*concentration[5];
        nettFlux[6] = (1/(1+1*(hist_3_1^2))) * (1 - 0.2 + 0.3)*concentration[5] - 1*concentration[6];
    end
end

const delay(p, t; idxs=nothing) = typeof(idxs) <: Number ? 0.0 : ones(6);
Bcell = DDEProblem(computeNFkBFluxes!, [1.0,1.0,1.0,1.0,1.0,1.0], delay, (0.0, 120.0), (0); constant_lags=[0.75, 1.0, 1.5]);

@btime solution = solve(Bcell, MethodOfSteps(Tsit5()), save_everystep=false, maxiters=1e10);
# Example 2
function computeNFkBFluxes!(nettFlux, concentration, delay, (historicFlux), time)
    @inbounds begin
        # get historic values for nuclear NFkB concentrations (for delayed transcription)
        p = (historicFlux);
        delay(historicFlux, p, time-0.75);
        hist_1_075 = historicFlux[1];
        hist_2_075 = historicFlux[2];
        hist_3_075 = historicFlux[3];

        delay(historicFlux, p, time-1.0);
        hist_2_1 = historicFlux[2];
        hist_3_1 = historicFlux[3];

        delay(historicFlux, p, time-1.5);
        hist_1_15 = historicFlux[1];
        hist_2_15 = historicFlux[2];

        nettFlux[1] = (1/(1+1*(hist_2_075^2))) * (0.2 - 0.3)*concentration[1] - 0.5*concentration[1];
        nettFlux[2] = (1/(1+1*(hist_2_1^2))) * (1 - 0.2 + 0.3)*concentration[1] +
                (1/(1+1*(hist_1_15^2))) * (0.2 - 0.3)*concentration[2] - 1*concentration[2];
        nettFlux[3] = (1/(1+1*(hist_2_15^2))) * (1 - 0.2 + 0.3)*concentration[2] - 1*concentration[3];
        nettFlux[4] = (1/(1+1*(hist_1_075^2))) * (1 - 0.2 + 0.3)*concentration[3] - 1*concentration[4];
        nettFlux[5] = (1/(1+1*(hist_3_075^2))) * (1 - 0.2 + 0.3)*concentration[4] - 1*concentration[5];
        nettFlux[6] = (1/(1+1*(hist_3_1^2))) * (1 - 0.2 + 0.3)*concentration[5] - 1*concentration[6];
    end
end
historicFlux = ones(Float64, 6);
const delay(historicFlux, p, t) = (historicFlux .= 1.0);
Bcell = DDEProblem(computeNFkBFluxes!, [1.0,1.0,1.0,1.0,1.0,1.0], delay, (0.0, 120.0), (historicFlux); constant_lags=[0.75, 1.0, 1.5]);

@btime solution = solve(Bcell, MethodOfSteps(Tsit5()), save_everystep=false, maxiters=1e10);

The 1st example (implemented with idxs) runs in 1.778 ms (76287 allocations: 1.29 MiB), and the 2nd example (implemented using in-place history function) runs in 326.035 μs (7462 allocations: 1.39 MiB). I thought the idxs implementation is supposed to save me some allocations, but it’s 10 times more. Also, if I run the first example with save_everystep=true, then it runs in 1.813 ms (77228 allocations: 1.41 MiB), which is 941 more allocations than save_everystep=false, so I’m assuming that saving every step of the solution only takes 1/100 of what the interpolation function in the DDE solver takes, so I might as well just pre-allocate a cache to store every steps?

helengracehuang avatar Sep 14 '21 16:09 helengracehuang

I tried to track this down and I think this is an instance of https://github.com/JuliaLang/julia/issues/35800 . The first case that I saw interpolations having weird allocations that couldn't be fixed was https://github.com/SciML/OrdinaryDiffEq.jl/pull/1473 . There's something weird going on with inference and I think this might need a Julia compiler bugfix, so it'll have to wait for now.

ChrisRackauckas avatar Sep 14 '21 17:09 ChrisRackauckas

I came across the same problem. Is there an immediate workaround when mutation is not an option?

lindnemi avatar Oct 25 '21 16:10 lindnemi

Non-mutation is always going to allocate if it's working with arrays. The issue instead is that scalar operations allocate due to a bug in inference.

ChrisRackauckas avatar Oct 25 '21 20:10 ChrisRackauckas

fyi, using the original example

Show script
# Example 1
using Pkg
pkg"activate --temp"
pkg"add DelayDiffEq, BenchmarkTools"

using DelayDiffEq, BenchmarkTools

function computeNFkBFluxes!(nettFlux, concentration, delay, (birthday), time)
    @inbounds begin
        p = (birthday);
        hist_1_075 = delay(p, time-0.75; idxs=1);
        hist_2_075 = delay(p, time-0.75; idxs=2);
        hist_3_075 = delay(p, time-0.75; idxs=3);

        hist_2_1 = delay(p, time-1.0; idxs=2);
        hist_3_1 = delay(p, time-1.0; idxs=3);

        hist_1_15 = delay(p, time-1.5; idxs=1);
        hist_2_15 = delay(p, time-1.5; idxs=2);

        nettFlux[1] = (1/(1+1*(hist_2_075^2))) * (0.2 - 0.3)*concentration[1] - 0.5*concentration[1];
        nettFlux[2] = (1/(1+1*(hist_2_1^2))) * (1 - 0.2 + 0.3)*concentration[1] +
                (1/(1+1*(hist_1_15^2))) * (0.2 - 0.3)*concentration[2] - 1*concentration[2];
        nettFlux[3] = (1/(1+1*(hist_2_15^2))) * (1 - 0.2 + 0.3)*concentration[2] - 1*concentration[3];
        nettFlux[4] = (1/(1+1*(hist_1_075^2))) * (1 - 0.2 + 0.3)*concentration[3] - 1*concentration[4];
        nettFlux[5] = (1/(1+1*(hist_3_075^2))) * (1 - 0.2 + 0.3)*concentration[4] - 1*concentration[5];
        nettFlux[6] = (1/(1+1*(hist_3_1^2))) * (1 - 0.2 + 0.3)*concentration[5] - 1*concentration[6];
    end
end

const delay(p, t; idxs=nothing) = typeof(idxs) <: Number ? 0.0 : ones(6);
Bcell = DDEProblem(computeNFkBFluxes!, [1.0,1.0,1.0,1.0,1.0,1.0], delay, (0.0, 120.0), (0); constant_lags=[0.75, 1.0, 1.5]);

@btime solution = solve(Bcell, MethodOfSteps(Tsit5()), save_everystep=false, maxiters=1e10);

# Example 2
function computeNFkBFluxes!(nettFlux, concentration, delay, (historicFlux), time)
    @inbounds begin
        # get historic values for nuclear NFkB concentrations (for delayed transcription)
        p = (historicFlux);
        delay(historicFlux, p, time-0.75);
        hist_1_075 = historicFlux[1];
        hist_2_075 = historicFlux[2];
        hist_3_075 = historicFlux[3];

        delay(historicFlux, p, time-1.0);
        hist_2_1 = historicFlux[2];
        hist_3_1 = historicFlux[3];

        delay(historicFlux, p, time-1.5);
        hist_1_15 = historicFlux[1];
        hist_2_15 = historicFlux[2];

        nettFlux[1] = (1/(1+1*(hist_2_075^2))) * (0.2 - 0.3)*concentration[1] - 0.5*concentration[1];
        nettFlux[2] = (1/(1+1*(hist_2_1^2))) * (1 - 0.2 + 0.3)*concentration[1] +
                (1/(1+1*(hist_1_15^2))) * (0.2 - 0.3)*concentration[2] - 1*concentration[2];
        nettFlux[3] = (1/(1+1*(hist_2_15^2))) * (1 - 0.2 + 0.3)*concentration[2] - 1*concentration[3];
        nettFlux[4] = (1/(1+1*(hist_1_075^2))) * (1 - 0.2 + 0.3)*concentration[3] - 1*concentration[4];
        nettFlux[5] = (1/(1+1*(hist_3_075^2))) * (1 - 0.2 + 0.3)*concentration[4] - 1*concentration[5];
        nettFlux[6] = (1/(1+1*(hist_3_1^2))) * (1 - 0.2 + 0.3)*concentration[5] - 1*concentration[6];
    end
end
historicFlux = ones(Float64, 6);
const delay(historicFlux, p, t) = (historicFlux .= 1.0);
Bcell = DDEProblem(computeNFkBFluxes!, [1.0,1.0,1.0,1.0,1.0,1.0], delay, (0.0, 120.0), (historicFlux); constant_lags=[0.75, 1.0, 1.5]);

@btime solution = solve(Bcell, MethodOfSteps(Tsit5()), save_everystep=false, maxiters=1e10);

the number of allocations seems to have improved significantly in the current [email protected] on Julia 1.6:

  191.342 μs (1067 allocations: 139.84 KiB) # example 1
  152.096 μs (7470 allocations: 415.97 KiB) # example 2

Same Julia version, same script, but resolved on an old registry from September 2021 reproduced the problem.

  996.489 μs (76286 allocations: 1.29 MiB) # example 1
  261.950 μs (7461 allocations: 1.39 MiB) # example 2

hexaeder avatar Jun 24 '24 10:06 hexaeder

That's great to see! Yes we've been continuing to make performance improvements. We should keep this open until it gets to what we think is the minimum and then add some tests.

ChrisRackauckas avatar Jun 27 '24 01:06 ChrisRackauckas