MaBLAS.jl
MaBLAS.jl copied to clipboard
The analytical model for blocking
julia> using Hwloc
julia> function params(::Type{T}) where T
micro_m = LoopVectorization.mᵣ * VectorizationBase.pick_vector_width(T)
micro_n = LoopVectorization.nᵣ
topology = Hwloc.topology_load()
l3 = getdatacache(topology, :L3Cache)
l2 = getdatacache(l3, :L2Cache)
l1 = getdatacache(l2, :L1Cache)
l1associativity = l1.attr.associativity
l1associativity = l1associativity == 0 ? 8 : # assume 8 if unknown
l1associativity == -1 ? l1.attr.size ÷ l1.attr.linesize : # fully associative
l1associativity
l1sets = l1.attr.size ÷ (l1.attr.linesize * l1associativity)
ncachelines_per_panelA = floor(Int, (l1associativity - 1) / (1 + micro_n/micro_m))
cache_k = ncachelines_per_panelA * l1.attr.linesize * l1sets ÷ (micro_m * sizeof(T))
end
params (generic function with 1 method)
julia> getdatacache(topology, name) = for t in topology.children
isdefined(t, :type_) || return nothing
return t.type_ === name && t.attr.type_ in (:Unified, :Data) ? t : getdatacache(t, name)
end
getdatacache (generic function with 1 method)
julia> params(Float64)
213
julia> params(Float32)
256
someone may find this code useful.
This is an, uhh, interesting choice
julia> params(Float32)
76
julia> params(Float64)
76