Removing `@generated` functions
Right now, there is a mess of @generated functions in run/fractal_flame.jl. In principle, these can be removed by...
@kernel function naive_chaos_kernel!(points, n, H_fxs, H_kwargs,
H_clrs, H_clr_kwargs,
H_probs, H_fnums,
layer_values, layer_reds, layer_greens,
layer_blues, layer_alphas, frame, bounds,
bin_widths, num_ignore, max_range)
tid = @index(Global,Linear)
pt = Ref(point(0.0,0.0))
pt[] = points[tid]
dims = Fae.dims(pt[])
clr = Ref(RGBA{Float32}(0,0,0,0))
seed = quick_seed(tid)
fid = create_fid(H_probs, H_fnums, seed)
for i = 1:n
# quick way to tell if in range to be calculated or not
sketchy_sum = absum(pt[])
if sketchy_sum < max_range
if length(H_fnums) > 1 || H_fnums[1] > 1
seed = simple_rand(seed)
fid = create_fid(H_probs, H_fnums, seed)
else
fid = UInt(1)
end
choices = find_random_fxs(fid, H_fnums, H_probs)
#ntuple(Val(choices)) do k
for k in NTuple(Val.(choices))
Base.@_inline_meta
pt[] = H_fxs[k](pt[].y, pt[].x, frame; H_kwargs[k]...)
clr[] = H_clrs[k](pt[].y, pt[].x, clr[], frame; H_clr_kwargs[k]...)
end
histogram_output!(layer_values, layer_reds, layer_greens,
layer_blues, layer_alphas, pt[], clr[],
bounds, dims, bin_widths, i, num_ignore)
end
end
@inbounds points[tid] = pt[]
end
ie, finding some way to iterate through known values with ntuple, like: https://github.com/CliMA/ClimateMachine.jl/blob/2e0b6b7d97719e410d12a8596c98d5db7f891dbf/src/Numerics/DGMethods/remainder.jl#L510
I couldn't quite figure out how to go from the tuple of ints to an ntuple of vals.
The core issue is that LLVM cannot optimize on the tuple of functions because each function is a unique type, so it cannot be stored as an ntuple to begin with.
This discussion was introduced in #64
Note that I tried to partially fix the length compilation times by minimizing the number of functions we need to use the @generated macro on, but these are configured with keyword arguments and those arguments also need a loop, so we we did not save any operations.
Might be time to put some adult pants on and "just do it" by creating my own @generated macro for Fable: https://github.com/JuliaLang/julia/blob/d49a3c74c97c9ca9ef711d644a3f2b1c02b38d63/base/expr.jl#L1074
Here is @generated:
macro generated(f)
if isa(f, Expr) && (f.head === :function || is_short_function_def(f))
body = f.args[2]
lno = body.args[1]
return Expr(:escape,
Expr(f.head, f.args[1],
Expr(:block,
lno,
Expr(:if, Expr(:generated),
body,
Expr(:block,
Expr(:meta, :generated_only),
Expr(:return, nothing))))))
else
error("invalid syntax; @generated must be used with a function definition")
end
end
I believe we can just save all the exprs from user fums and splat them all into a big function at the end. I don't know if this will actually cut compile time, but it does...
- reduce the number of times we are metaprogramming.
- It also allows us to do #74.
- Should be the same cost as us going through GLSL for everything.
So we have something like:
-
@fi xto mark a variable as special (something like Symbolic Utils) -
@fum fxto create custom functions -
@fo (fx, fx, fx)to combine everything into a single operator -
@fee (fo, fo, fo)to combine everything into the final executable
This was the original vision for the code.
julia> Expr(:escape, :(x=5))
:($(Expr(:escape, :(x = 5))))
julia> macro call_ex()
ex
end
@call_ex (macro with 1 method)
julia> @call_ex()
5
Lots of notes to toss from Slack:
- No way around
@generatedin Julia. You can do it in C because ptrs are defined in modules that all know each other. - Try
@noinlinefor@generatedfunctions - Precompile tools
- https://github.com/JuliaGPU/CUDA.jl/pull/1853
- https://github.com/JuliaGPU/CUDA.jl/issues/2450
julia> struct VTable{T}
funcs::T
end
julia> @generated function (VT::VTable{T})(fidx, args...) where T
N = length(T.parameters)
quote
Base.Cartesian.@nif $(N+1) d->fidx==d d->return VT.funcs[d](args...) d->error("fidx oob")
end
end
julia> VT = VTable(((x)->x+1, (x)->x+2))
VTable{Tuple{var"#2#4", var"#3#5"}}((var"#2#4"(), var"#3#5"()))
julia> VT = VTable(((x)->x+1, (x)->x+2))^C
julia> VT(1, 2)
3
julia> VT(2, 2)
4
julia> VT(3, 2)
ERROR: fidx oob
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] macro expansion
@ ./REPL[3]:4 [inlined]
[3] (::VTable{Tuple{var"#2#4", var"#3#5"}})(fidx::Int64, args::Int64)
@ Main ./REPL[3]:1
[4] top-level scope
@ REPL[7]:1
julia> @code_typed VT(3, 2)
CodeInfo(
1 ─ %1 = (fidx === 1)::Bool
└── goto #3 if not %1
2 ─ %3 = Core.getfield(args, 1)::Int64
│ %4 = Base.add_int(%3, 1)::Int64
└── return %4
3 ─ %6 = (fidx === 2)::Bool
└── goto #5 if not %6
4 ─ %8 = Core.getfield(args, 1)::Int64
│ %9 = Base.add_int(%8, 2)::Int64
└── return %9
5 ─ invoke Main.error("fidx oob"::String)::Union{}
└── unreachable
) => Int64