DiffEqGPU.jl icon indicating copy to clipboard operation
DiffEqGPU.jl copied to clipboard

`EnsembleGPUKernel `+ Texture Memory Support

Open agerlach opened this issue 1 year ago • 20 comments

@ChrisRackauckas @utkarsh530

Per our conversation here is an example demonstrating how texture memory interpolation could be use.

I would like to be able to leverage CUDA.jl's texture memory support for interpolation of data in the EOM and/or in a callback. A use case could be dropping a ball in a wind field with ground impact termination for a non-flat terrain. Here, one would want to interpolate the wind field as a function of state in the eom as a forcing term and an elevation map as a function of altitude.

Below is an initial prototype. This includes a CPU implementation that leverages DataInterpolations.jl to demonstrate the functionality desired using this data forecast.txt I also included an initial non-working prototype using texture memory.

No interpolation

Working model for CPU and GPU w/o interpolation

using CUDA, DiffEqGPU, OrdinaryDiffEq, Plots, Serialization, StaticArrays, Distributions, LinearAlgebra
import DataInterpolations
const DI = DataInterpolations

# Ballistic model
function ballistic(u, p, t, weather)
    CdS, mass, g = p
    vel = @view u[4:6]

    wind, ρ = get_weather(weather, u[3])

    airvelocity = vel - wind
    airspeed = norm(airvelocity)
    accel = -(ρ * CdS * airspeed) / (2 * mass) * airvelocity - mass*SVector{3}(0f0, 0f0, g)

    return SVector{6}(vel..., accel...)
end

# constant wind and airdensity.
function get_weather(weather, z)
    wind = SVector{3}(1f0, 0f0, 0f0)
    ρ = 1.27f0
    wind, ρ
end

trajectories = 100
u0 = @SVector [0.0f0, 0.0f0, 10000.0f0, 0f0, 0f0, 0f0]
tspan = (0.0f0, 50.0f0)
saveat = LinRange(tspan..., 100)
p = @SVector [25f0, 225f0, 9.807f0]
CdS_dist = Normal(0f0, 1f0)

eom_nowind = (u, p, t) -> ballistic(u,p,t,nothing)
prob = ODEProblem{false}(eom_nowind, u0, tspan, p)
sol = solve(prob, Tsit5())

prob_func = (prob, i, repeat) -> remake(prob, p = p + SVector{3}(rand(CdS_dist), 0f0, 0f0))
eprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)

esol = solve(eprob, Tsit5(); trajectories, saveat)
esol_gpu = solve(eprob, GPUTsit5(), EnsembleGPUKernel(); trajectories, saveat)

DataInterpolations.jl CPU Example

This demonstrates the basic capability I would like to replicate in w/ EnsembleGPUKernel using CUDA.CuTexture

# Ballistic w/ Interpolated winds via DataInterpolations.jl
data = deserialize("forecast.txt")
N = length(data.altitude)

weather_sa = map(data.altitude, data.windx, data.windy, data.density) do alt, wx, wy, ρ
    SVector{4}(alt, wx, wy, ρ)
end

interp = DI.LinearInterpolation(weather_sa,data.altitude)

function get_weather(itp::DI.LinearInterpolation, z)
    weather = itp(u[3])
    wind = SVector{3}(weather[2], weather[3], 0f0)
    ρ = weather[4]
    wind, ρ
end

eom_di = (u,p,t) -> ballistic(u,p,t,interp)
prob = ODEProblem{false}(eom_di, u0, tspan, p)
sol = solve(prob, Tsit5())

prob_func = (prob, i, repeat) -> remake(prob, p = p + SVector{3}(rand(dist), 0f0, 0f0))
eprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
esol = solve(eprob, Tsit5(); trajectories, saveat)

GPU Texture Interpolation Validation

Demonstrate usage of CuTexture for interpolation. Note, here I index into the texture memory by broadcasting over a CuArray{Float32} of indices via dst_gpu .= getindex.(Ref(texture), idx_tlu)

weather = map(weather_sa) do w
    (w...,)
end

weather_TA = CuTextureArray(weather)
texture = CuTexture(weather_TA; address_mode = CUDA.ADDRESS_MODE_CLAMP, normalized_coordinates = true, interpolation = CUDA.LinearInterpolation())

## Test Texture interpolation
idx = LinRange(0f0, 1f0, 4000)
idx_gpu = CuArray(idx)
idx_tlu = (1f0-1f0/N)*idx_gpu .+ 0.5f0/N  # normalized table lookup form https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#table-lookup
dst_gpu = CuArray{NTuple{4, Float32}}(undef, size(idx))
dst_gpu .= getindex.(Ref(texture), idx_tlu)  # interpolation ℝ->ℝ⁴

begin
    p1 = scatter(data.altitude, data.altitude, ylabel = "Altitude", label = "Raw", ms = 2, marker = :x)
    plot!(idx*data.altitude[end], getindex.(dst_cpu, 1), label = "Texture",lw = 2)

    p2 = scatter(data.altitude, data.windx, ylabel = "Wind X", label = "Raw", ms = 2, marker = :x, leg = false)
    plot!(idx*data.altitude[end], getindex.(dst_cpu, 2), label = "Texture",lw = 2)

    p3 = scatter(data.altitude, data.windy, ylabel = "Wind Y", label = "Raw", ms = 2, marker = :x, leg = false)
    plot!(idx*data.altitude[end], getindex.(dst_cpu, 3), label = "Texture",lw = 2)

    p4 = scatter(data.altitude, data.density, ylabel = "Density", label = "Raw", ms = 2, marker = :x, leg = false)
    plot!(idx*data.altitude[end], getindex.(dst_cpu, 4), label = "Texture",lw = 2)

    plot(p1, p2, p3, p4, link=:x, xlabel = "Altitude")
end

EnsembleGPUKernel + CuTexture prototype

Non-working prototype. Note here the get_weather function is indexing the texture at a single index for a single trajectory which isn't supported by CUDA.jl. Although this is scalar indexing it should actually be occurring for each trajectory in the ensemble.

function get_weather(tex::CuTexture, z, zmax = data.altitude[end], N = length(data.altitude))
    idx = (1f0-1f0/N)*z/zmax + 0.5f0/N # normalized input for table lookup based on https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#table-lookup
    tex[idx]
end

eom_tex = (u,p,t) -> ballistic(u,p,t,texture)
prob = ODEProblem{false}(eom_tex, u0, tspan, p)
prob_func = (prob, i, repeat) -> remake(prob, p = p + SVector{3}(rand(dist), 0f0, 0f0))
eprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
esol_gpu = solve(eprob, GPUTsit5(), EnsembleGPUKernel(); trajectories, saveat) #InvalidIR 

ContinousCallback Prototype

The above example only does interpolation in the eom. However, interpolation could also occur in evaluating a callback. e.g. something like this

terrain_texture = CuTexture(...)  # ℝ² -> ℝ, map xy position to ground elevation

condition(u,t,integrator) = u[3] - terrain_texture[@view u[1:2]]. # check height above elevation
cb = ContinuousCallback(condition, terminate!)

agerlach avatar Jan 20 '23 20:01 agerlach