diffeqpy icon indicating copy to clipboard operation
diffeqpy copied to clipboard

Differences in performance when comparing diffeqpy with scipy.solve_ivp

Open alm818 opened this issue 3 years ago • 2 comments

I was testing the performance of diffeqpy's Tsit5 on Lotka-Volterra and comparing it with scipy's RK45. diffeqpy is always slower 1.5x without Numba and 5x with Numba. Any reason why this could be? Could it be because of the native environment not being Julia?

from diffeqpy import ode
import numpy as np
import numba
from numba import njit
import timeit
from scipy.integrate import solve_ivp
# Lotka-Volterra with Linear interpolating of the parameter p0 to p1
def julia_f(x,p,t):
    p0, p1 = p
    pt = p0 + (p1 - p0)*t
    a, b, c, d = pt
    df = np.zeros(len(x))
    for i in range(len(x)):
        if i % 2 == 0:
            df[i] = a*x[i]-b*x[i]*x[i+1]
        else:
            df[i] = -c*x[i]+d*x[i-1]*x[i]
    return df

numba_julia_f = njit(julia_f)

def scipy_f(t,x,p0,p1):
    pt = p0 + (p1 - p0)*t
    a, b, c, d = pt
    df = np.zeros(len(x))
    for i in range(len(x)):
        if i % 2 == 0:
            df[i] = a*x[i]-b*x[i]*x[i+1]
        else:
            df[i] = -c*x[i]+d*x[i-1]*x[i]
    return df
numba_scipy_f = njit(scipy_f)

# Generating test case
T = 10
D = 1000 # Number of independent systems
era = 100
X = np.random.rand(T, 2*D)
P0 = np.random.rand(T, 4)
P1 = np.random.rand(T, 4)

def scipy_exec(with_numba, vectorized):
    if with_numba:
        for i in range(T):
            solve_ivp(numba_scipy_f, (0, 1), X[i], method='RK45', vectorized=vectorized, args=(P0[i], P1[i]))
    else:
        for i in range(T):
            solve_ivp(scipy_f, (0, 1), X[i], method='RK45', vectorized=vectorized, args=(P0[i], P1[i]))

def julia_exec(with_numba):
    if with_numba:
        for i in range(T):
            prob = ode.ODEProblem(numba_julia_f, X[i], (0.,1.), (P0[i], P1[i]))
            sol = ode.solve(prob, ode.Tsit5())
    else:
        for i in range(T):
            prob = ode.ODEProblem(julia_f, X[i], (0.,1.), (P0[i], P1[i]))
            sol = ode.solve(prob, ode.Tsit5())

In Jupyter notebook


%timeit scipy_exec(False, False)
# 315 ms ± 38.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit scipy_exec(True, False)
# 6.62 ms ± 615 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit julia_exec(False)
# 521 ms ± 38.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit julia_exec(True)
# 30.9 ms ± 2.07 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

alm818 avatar Jun 21 '21 04:06 alm818

Corrected with same abstol and reltol. But still get the same performance issue Screen Shot 2021-06-21 at 8 55 14 AM

alm818 avatar Jun 21 '21 04:06 alm818

Yes, the Python->Julia bridge seems to have some overhead that we need to work on. Until then, for full performance we'd recommend using the Julia DifferentialEquations.jl package or the diffeqr package from R (with the special JIT compilation).

ChrisRackauckas avatar Jun 21 '21 05:06 ChrisRackauckas

The new JIT form still has quite a bit of overhead, but it's about 2x faster than numba so I'll call this closed.

from diffeqpy import ode
import numpy as np
import numba
from numba import njit
import timeit
from scipy.integrate import solve_ivp
from juliacall import Main as jl

# Lotka-Volterra with Linear interpolating of the parameter p0 to p1
def julia_f(df,x,p,t):
    p0 = p[0:4]
    p1 = p[4:8]
    pt = p0 + (p1 - p0)*t
    a, b, c, d = pt
    for i in range(len(x)):
        if i % 2 == 0:
            df[i] = a*x[i]-b*x[i]*x[i+1]
        else:
            df[i] = -c*x[i]+d*x[i-1]*x[i]
    return df

numba_julia_f = njit(julia_f)

def scipy_f(t,x,p0,p1):
    pt = p0 + (p1 - p0)*t
    a, b, c, d = pt
    df = np.zeros(len(x))
    for i in range(len(x)):
        if i % 2 == 0:
            df[i] = a*x[i]-b*x[i]*x[i+1]
        else:
            df[i] = -c*x[i]+d*x[i-1]*x[i]
    return df
numba_scipy_f = njit(scipy_f)

# Generating test case
T = 10
D = 1000 # Number of independent systems
era = 100
X = np.random.rand(T, 2*D)
P0 = np.random.rand(T, 4)
P1 = np.random.rand(T, 4)
p = [np.concatenate((P0[i],P1[i])) for i in range(T)]

def scipy_exec(with_numba, vectorized):
    if with_numba:
        for i in range(T):
            solve_ivp(numba_scipy_f, (0, 1), X[i], method='RK45', vectorized=vectorized, args=(P0[i], P1[i]))
    else:
        for i in range(T):
            solve_ivp(scipy_f, (0, 1), X[i], method='RK45', vectorized=vectorized, args=(P0[i], P1[i]))

prob = ode.ODEProblem(julia_f, X[0], (0.,1.), p[0])
fastprob = de.jit(prob)
sol = ode.solve(prob, ode.Tsit5())
_prob = ode.remake(fastprob, u0=jl.convert(jl.Array,X[0]), p=jl.convert(jl.Array,p[0]))
sol = ode.solve(_prob, ode.Tsit5())

def julia_exec(with_jit):
    if with_jit:
        for i in range(T):
            _prob = ode.remake(fastprob, u0=jl.convert(jl.Array,X[i]), p=jl.convert(jl.Array,p[i]))
            sol = ode.solve(_prob, ode.Tsit5())
    else:
        for i in range(T):
            _prob = ode.remake(prob, u0=X[i], p=p[i])
            sol = ode.solve(_prob, ode.Tsit5())

timeit.Timer(lambda: scipy_exec(False, False)).timeit(number=100) # 23.11515112500092
timeit.Timer(lambda: scipy_exec(True, False)).timeit(number=100) # 0.5127864579990273
timeit.Timer(lambda: julia_exec(True)).timeit(number=100) # 0.36892091699883167

ChrisRackauckas avatar Oct 21 '23 11:10 ChrisRackauckas