Impero.jl icon indicating copy to clipboard operation
Impero.jl copied to clipboard

Pre-setting memory for tree recursion evaluations on arrays

Open sandreza opened this issue 5 years ago • 2 comments

Here is a sketch of a struct that preallocates all scratch arrays for intermediate representations of functions

import Base: +
using BenchmarkTools, Test, LinearAlgebra
struct Add{S,T,V}
    term1::S
    term2::T
    scratch::V
end
Add(a,b) = Add(a,b, nothing)
compute(x) = x
function compute(a::Add{S,T,V}) where
    {S, T, V <: Nothing} 
    return +(compute(a.term1), compute(a.term2))
end
function Add(a,b; data=nothing) 
    return Add(a, b, data)
end
function compute(a::Add{S,T,V}) where
    {S, T, V} 
    c = check(compute(a.term1), a.term1)
    d = check(compute(a.term2), a.term2)
    @inbounds for i in eachindex(a.scratch)
        a.scratch[i] = c[i] + d[i]
    end
    return nothing
end
check(a::Nothing, b) = b.scratch
check(a,b) = a

# Test
a = randn(100)
b = randn(100)
c = Add(a, b, copy(a))
@btime compute(c);
@test norm(c.scratch - (a+b))==0.0
c = Add(a,Add(a,b, copy(a)), copy(a))
d = Add(a,Add(a,b))
@btime compute(c);
@btime compute(d);
@test norm(c.scratch - (a+(a+b)))==0.0
@test norm(c.scratch - compute(d))==0.0

Perhaps we can leverage multiple dispatch to use a structure like this for computations?

sandreza avatar Oct 17 '20 18:10 sandreza

and continuing the example:

struct Field{S}
    data::S
end
function +(a::Field, b::Field)
    return Add(Field(a.data), Field(b.data), copy(b.data))
end
function +(a::Add, b::Field)
    return Add(a, b, copy(b.data))
end
function +(a::Add, b::Add)
    return Add(a, b, copy(b.scratch)) 
end
copy(::Nothing) = nothing
+(a::Field, b::Add) = +(b,a)
getindex(b::Field, i::Int) = b.data[i]
a = Field(randn(100))
b = Field(randn(100))
c = a + b
@btime compute(c)
@btime a.data+b.data;
compute(c)
@test norm(c.scratch - (a.data + b.data))==0.0
c = a + b + b + a
@btime compute(c)
@btime (a.data + b.data + b.data + a.data);
compute(c)
@test norm(c.scratch - (a.data + b.data + b.data + a.data))==0.0

sandreza avatar Oct 17 '20 19:10 sandreza

Here is an alternative that is non-memory allocating as long as the operations are within a loop

for binary_operator in binary_operators
    b_name, b_symbol = Meta.parse.(binary_operator)
    @eval inloop(a::$b_name{𝒮, 𝒯}, i) where {𝒮, 𝒯} = $b_symbol(inloop(a.term1,i), inloop(a.term2, i))
    @eval export $b_name, $b_symbol
end

inloop(a::Array, i) = a[i]

inloop(a::Wrapper, i) = inloop(a.data, i)

struct Loop{S,T} <: AbstractExpression
    data::S
    metadata::T
end

function inloop(a::Loop)
    @inbounds for i in eachindex(a.metadata)
        a.metadata[i] = inloop(a.data,i)
    end
    return nothing
end

## test
a = randn(100000)
@wrapper ia = a
b = copy(a)
expr = ia *ia + ia + ia * ia *ia
comp = a .* a .+ a .+ a .* a .* a
ell = Loop(expr,b)
inloop(ell)
norm(ell.metadata - comp)
@btime a .* a .+ a .+ a .* a .* a
@btime inloop(ell)

sandreza avatar Oct 27 '20 00:10 sandreza