move stack to Vector
By my measurements, it's a bunch faster for this workload (this may be new as of Julia 1.11)
julia> function f3(d, n)
for i in 1:n
push!(d, i)
pop!(d)
end
end
f3 (generic function with 1 method)
julia> @benchmark f3(d, 1000) evals=1 setup = (d=Vector{Int}())
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
Range (min … max): 789.000 ns … 10.042 μs ┊ GC (min … max): 0.00% … 0.00%
Time (median): 797.000 ns ┊ GC (median): 0.00%
Time (mean ± σ): 823.878 ns ± 158.991 ns ┊ GC (mean ± σ): 0.00% ± 0.00%
▅█▅▁▅▄ ▂ ▁▄▃ ▂ ▁
███████▆▆▄▁▁▁▁▁▁▇██▃███▇▇▆▇█▆▆▅▄▄▄▄▄▅▄▅▄▆▇▇▆▆▇▆▆▆▅▅▄▁▄▁▃▃▄▅██ █
789 ns Histogram: log(frequency) by time 1.07 μs <
Memory estimate: 96 bytes, allocs estimate: 1.
julia> @benchmark f3(d, 1000) evals=1 setup = (d=Deque{Int}())
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
Range (min … max): 3.667 μs … 28.349 μs ┊ GC (min … max): 0.00% … 0.00%
Time (median): 3.803 μs ┊ GC (median): 0.00%
Time (mean ± σ): 3.930 μs ± 489.972 ns ┊ GC (mean ± σ): 0.00% ± 0.00%
▁▆█▄▇▅▃▃▂▁▁▂▅▃ ▁ ▁▁ ▁ ▂
████████████████▇▇██▇▇▆▇█▇███▇▇▆█▇▄▅▄▆▆▆▆▇▇▅▅▅▄▅▅▆▅▄▅▄▄▄▄▆▅ █
3.67 μs Histogram: log(frequency) by time 5.66 μs <
tests fail
Since Deque is implemented as an unrolling linked-list, it would be nice to also benchmark inserting all elements followed by removing them all:
function f4(d, n)
for i in 1:n
push!(d, i)
end
for i in 1:n
pop!(d)
end
end
julia> @benchmark f4(d, 1000) evals=1 setup = (d=Vector{Int}())
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
Range (min … max): 762.000 ns … 1.745 ms ┊ GC (min … max): 0.00% … 98.73%
Time (median): 962.000 ns ┊ GC (median): 0.00%
Time (mean ± σ): 2.022 μs ± 23.323 μs ┊ GC (mean ± σ): 37.07% ± 4.78%
▇█▇▅▅▅▅▅▅▅▄▄▃▃▃▂▂▁▁ ▁▁▂▂▁▂▁▁▁▁▁▁▁ ▂
██████████████████████▆▅▅▅▃▆▄▄▁▃▃▇███████████████▇▇▇▇▇▆▇▆▆▅▆ █
762 ns Histogram: log(frequency) by time 4.03 μs <
Memory estimate: 21.80 KiB, allocs estimate: 7.
julia> @benchmark f4(d, 1000) evals=1 setup = (d=Deque{Int}())
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
Range (min … max): 3.076 μs … 163.643 μs ┊ GC (min … max): 0.00% … 0.00%
Time (median): 3.677 μs ┊ GC (median): 0.00%
Time (mean ± σ): 3.668 μs ± 2.911 μs ┊ GC (mean ± σ): 0.00% ± 0.00%
▃▇▆▅▄▃▂▁▁▁▁ ▁▇█▇▇▅▄▄▂▂▁▂▁ ▁▁▁ ▃
▅▇████████████▇▆████████████████▇▆▆▆▆▅▆▅▅▅▅▄▄▅▁▃▄▄▅▁▄▇█████ █
3.08 μs Histogram: log(frequency) by time 4.92 μs <
Memory estimate: 0 bytes, allocs estimate: 0.
Looks like the Vector version is ~3x faster, but allocates more.
I wonder if we shouldn't just deprecate Stack and Queue and tell people to use Deque or Vector depending on need.
Vector isn't a great queue unfortunately (circular queues are generally better since you never have to move data around within the structure)
Vector isn't a great queue unfortunately (circular queues are generally better since you never have to move data around within the structure)
Last time i branch marked it (which was pre-1.0) it was shockingly good as a queue
My initial experiments seem to confirm. I threw together the following which should be a lot faster than Vector since it only copies data on resize, but I am not seeing that. I presumably have a bug somewhere.
mutable struct Queue{T}
mem::Memory{T}
start::Int
len::Int
function Queue{T}() where T
new{T}(Memory{T}(undef, 2), 0, 0)
end
end
pow2index(i, size) = 1 + (i & (size-1))
function Base.resize!(q::Queue{T}, newsize) where T
(;start, len, mem) = q
memlen = length(mem)
newsize = nextpow(2, newsize)
@assert newsize >= len
newmem = Memory{T}(undef, newsize)
for i in 1:len
newmem[i] = mem[pow2index(start + i - 1, memlen)]
end
q.mem = newmem
q.start = 0
q
end
function Base.push!(q::Queue, v)
(;start, len, mem) = q
memlen = length(mem)
if len == memlen
memlen *= 2
(;start, mem) = resize!(q, memlen)
end
mem[pow2index(start + len, memlen)] = v
q.len += 1
v
end
function Base.popfirst!(q::Queue)
(;start, len, mem) = q
@assert len != 0
v = mem[pow2index(start, length(mem))]
q.start += 1
q.len -= 1
v
end
function testqueue1(q, n1, n2)
for i in 1:n1
push!(q, i)
end
for _ in 1:n2
v = popfirst!(q)
push!(q, v)
end
popfirst!(q)
end