ImageMorphology.jl
ImageMorphology.jl copied to clipboard
[Feature Request] Periodic connected_components
I make a lot of use of connected_components in 2 and 3 dimensions, and for several problems it's useful to have periodic boundary conditions such that clusters can roll over, e.g the matrix on the left should return the labels on the right.
[1 0 1 0 1 [1 0 1 0 1
0 0 0 0 0 0 0 0 0 0
1 0 1 1 1 => 2 0 2 2 2
0 0 0 0 0 0 0 0 0 0
1 0 1 1 1] 1 0 1 1 1]
I already implemented this within ImageMorphology, but I'm struggling to integrate how it calls without disturbing default behaviour.
Two changes only:
function label_components!(out::AbstractArray{T}, A::AbstractArray, iter; bkg=zero(eltype(A))) where T<:Integer
axes(out) == axes(A) || throw_dmm(axes(out), axes(A))
fill!(out, zero(T))
sets = DisjointMinSets{T}()
sizehint!(sets.parents, floor(Int, sqrt(length(A))))
@inbounds for i in CartesianIndices(A)
val = A[i]
val == bkg && continue
label = typemax(T) # sentinel value
for Δi in iter
ii = CartesianIndex(mod1.(Tuple(i + Δi), size(A))) #the only change here
checkbounds(Bool, A, ii) || continue #technically I think this also makes this checkbounds redundant
if A[ii] == val
newlabel = out[ii]
newlabel == bkg && continue
label = ((label == typemax(T)) | (label == newlabel)) ? newlabel : union!(sets, label, newlabel)
end
end
if label == typemax(T)
label = push!(sets)
end
out[i] = label
end
# Now parse sets to find the labels
newlabel = minlabel(sets)
@inbounds for i in eachindex(A, out)
if A[i] != bkg
out[i] = newlabel[find_root!(sets, out[i])]
end
end
return out
end
As well, half_pattern
needs a full_pattern
equivalent, e.g:
function full_pattern(A::AbstractArray{T,N}, connectivity::AbstractArray{Bool}) where {T,N}
all(in((1,3)), size(connectivity)) || throw(ArgumentError("connectivity must have size 1 or 3 in each dimension"))
for d = 1:ndims(connectivity)
size(connectivity, d) == 1 || reverse(connectivity; dims=d) == connectivity || throw(ArgumentError("connectivity must be symmetric"))
end
center = CartesianIndex(map(axes(connectivity)) do ax
(first(ax) + last(ax)) ÷ 2
end)
offsets = CartesianIndex{N}[]
for i in CartesianIndices(connectivity)
i == center && continue # continue instead of break ensures the full list of indices is given
if connectivity[i]
push!(offsets, i - center)
end
end
# return offsets
return (offsets...,) # returning as a tuple allows specialization
end
Obviously this comes with a performance hit as we compare more indices, but it's not too serious and multiple dispatch should mean only users of the periodic feature have to suffer it.
Either way, if anybody knows how to integrate this sensibly, it'd be handy.
For ad-hoc usage, how about the "padding + f + cropping" strategy?
# or `BorderArray` as lazy container
img_p = padarray(img, Pad(:circular,1,1)) # from ImageFiltering.jl
out = label_components(img_p)
out[2:end-1, 2:end-1]
I'm not very sure if we want to expose a keyword to control the boundary condition; even if we do, we might just provide another wrapper function, e.g.,
# may not work, but you can get the idea
function with_border(f, img, boundary)
img_p = padarray(img, boundary)
out = f(img_p)
return out[axes(img)...]
end
with_border(img, Pad(:circular, 1, 1)) do img_p
f(img_p)
end
This is more composable to me.
For full_pattern
, you might want the strel(CartesianIndex, connectivity)
from #65, any feedback and suggestions are welcome.
Hmm, this won't actually produce the correct behaviour, e.g. as:
julia> A = rand(Bool, 10,10)
10×10 Matrix{Bool}:
0 1 1 1 1 1 0 0 0 1
1 1 1 0 0 0 1 0 0 0
0 1 1 1 1 1 1 1 1 0
0 0 1 0 1 0 1 0 1 1
0 0 1 0 0 0 0 1 0 1
1 0 1 0 0 0 0 0 1 0
0 0 0 0 1 1 1 1 1 1
1 0 1 0 1 0 1 0 1 0
1 0 0 0 0 1 0 1 0 1
1 0 0 1 1 1 0 1 1 1
julia> img_p = padarray(A, Pad(:circular,1,1,))
12×12 OffsetArray(::Matrix{Bool}, 0:11, 0:11) with eltype Bool with indices 0:11×0:11:
1 1 0 0 1 1 1 0 1 1 1 1
1 0 1 1 1 1 1 0 0 0 1 0
0 1 1 1 0 0 0 1 0 0 0 1
0 0 1 1 1 1 1 1 1 1 0 0
1 0 0 1 0 1 0 1 0 1 1 0
1 0 0 1 0 0 0 0 1 0 1 0
0 1 0 1 0 0 0 0 0 1 0 1
1 0 0 0 0 1 1 1 1 1 1 0
0 1 0 1 0 1 0 1 0 1 0 1
1 1 0 0 0 0 1 0 1 0 1 1
1 1 0 0 1 1 1 0 1 1 1 1
1 0 1 1 1 1 1 0 0 0 1 0
julia> label_components(img_p)
12×12 OffsetArray(::Matrix{Int64}, 0:11, 0:11) with eltype Int64 with indices 0:11×0:11:
1 1 0 0 5 5 5 0 10 10 10 10
1 0 5 5 5 5 5 0 0 0 10 0
0 5 5 5 0 0 0 5 0 0 0 13
0 0 5 5 5 5 5 5 5 5 0 0
2 0 0 5 0 5 0 5 0 5 5 0
2 0 0 5 0 0 0 0 11 0 5 0
0 6 0 5 0 0 0 0 0 9 0 14
3 0 0 0 0 9 9 9 9 9 9 0
0 4 0 8 0 9 0 9 0 9 0 12
4 4 0 0 0 0 7 0 12 0 12 12
4 4 0 0 7 7 7 0 12 12 12 12
4 0 7 7 7 7 7 0 0 0 12 0
You can see that e.g. clusters 10 and 12 should share a label.
The resultant labelled image does contain the desired information though, if you scan along the edges and find the union of sets for each edge and it's off-dimension neighbours. I previously implemented things exactly like that - fairly performant, but clusmy and awkward to deal with extra dimensions. My implementation was as below... it's not pretty.
using Images: label_components!
function ghost_copy!(s, g=zeros(Int64, size(s, 1)+2, size(s, 2)+2))
g[2:end-1, 2:end-1] .= s
g[2:end-1, 1] .= s[1:end, end]
g[2:end-1, end] .= s[1:end, 1]
g[1, 2:end-1] .= s[end, 1:end]
g[end, 2:end-1] .= s[1, 1:end]
g[1,1] = s[end, end]; g[end, end] = s[1,1]
g[end, 1] = s[1, end]; g[1, end] = s[end, 1]
return g
end
function fuse_edges!(s::Matrix{Int64}, g=zeros(Int64, size(s, 1)+2, size(s, 2)+2))
ghost_copy!(s, g)
labs = Dict(0=>BitSet([]))
l1 = size(g, 1)
l2 = size(g, 2)
for i in [-1,0,1]
validate_clusters(view(g, 2:l1-1, 2), view(g, 2+i:l1-1+i, 1), 0, labs)
validate_clusters(view(g, 2:l1-1, l2-1), view(g, 2+i:l1-1+i, l2), 0, labs)
validate_clusters(view(g, 2, 2:l2-1), view(g, 1, 2+i:l2-1+i), 0, labs)
validate_clusters(view(g, l1-1, 2:l2-1), view(g, l1, 2+i:l2-1+i), 0, labs)
end
return labs
end
@inline function validate_clusters(e, g, bk, labs)
for (a,b) in zip(e,g)
if a != bk && b != bk
if a < b
labs[a] = push!(get!(labs, a, BitSet([])), b)
else
labs[a] = push!(get!(labs, a, BitSet([])), b)
end
end
end
end
function consolidate_labels(fusions)
for (key, values) in fusions
for value in values
fusions[value] = union(fusions[value], values)
#println(union(fusions[value], values))
end
end
for (key, values) in fusions
for value in values
delete!(fusions, value)
end
fusions[key] = values
end
delete!(fusions, 0)
return fusions
end
function replace_at(A, fs)
for (key, values) in fs
for i in eachindex(A)
if A[i] ∈ values
A[i] = key
end
end
end
return A
end
function periodic_cclabel(I, s=zeros(Int64, size(I)), g=zeros(Int64, size(I, 1)+2, size(I, 2)+2))
label_components!(s, I, trues(3,3))
fusions = consolidate_labels(fuse_edges!(s, g))
replace_at(s, fusions)
return (s, g)
end
Thanks for pointing out the difference.
From a performance perspective, how about separating the loop into the boundary part and inner part so that we can skip the additional mod1
calculation in the inner part?
A ; circular= true
keyword to control the behavior on the boundary loop part sounds like a good interface to me.
EdgeIterator
in TiledIterations can be used to build loop for boundary part; and strel_size
in #65 can be used to calculate the radius of iter
.
I'm still very new to morphology so still don't know the label_components
details. It would be great if you can get a draft PR and then we can work together to improve it and get this supported.
The only issue I see with this solution is that EdgeIterator
addresses memory in linear index order. This is ideal if you only want to examine the edge, but since we want to look in multiple directions around each edge position, the resultant access pattern is very inefficient (e.g. for a 3D CCL problem we have to jump by slice for every point along the edge).
I think the best we can do is something like:
dims = size(I) # image dimensions
sdims = strel_size(strel) # dims of the strel
outer = (sdims[1]-1:end-sdims[1]+1, ...) #repeat for all dimensions
inner = (sdims[1]:end-sdims[1], ...)
edges = EdgeIterator(outer, inner)
points = [out[e] for e in edges]
for it in iter #strel cart offsets
for (point, e) in zip(points, edges)
i = e+it
b = out[i] #label for unknown edge point
#compare labels; union stuff
This results in n+1 traversals of out
for a strel
with n set values. With some maths this could be done in one traversal, but it'd need specialised code to do this, and you'd need special rules for odd strel
shapes.
This would go in between the first and second pass of the CCL algorithm (e.g. here), but should function in much the same way as the first pass does.
Do you know how to use ntuple to generate outer
as above?
that EdgeIterator addresses memory in linear index order. This is ideal if you only want to examine the edge, but since we want to look in multiple directions around each edge position, the resultant access pattern is very inefficient
For most practical usages, edges only possess less than 1% pixels, right? How inefficient could it be?
Do you know how to use ntuple to generate outer as above?
Does outer = map((n,r)->r-1:n-r+1, dims, sdims)
work?
The answer is "surprisingly efficient". For example, to access a 100x100 array at edge sites only using a EdgeIterator
takes
Q = zeros(100,100)
function foo(Q, iter, e)
for i in iter
for p in e
Q[i+p]
end
end
end
iter = [a-CartesianIndex(2,2) for a in CartesianIndices((3,3))]
3×3 Matrix{CartesianIndex{2}}:
CartesianIndex(-1, -1) CartesianIndex(-1, 0) CartesianIndex(-1, 1)
CartesianIndex(0, -1) CartesianIndex(0, 0) CartesianIndex(0, 1)
CartesianIndex(1, -1) CartesianIndex(1, 0) CartesianIndex(1, 1)
in(A) = (3:size(A,1)-2, 3:size(A,2)-2)
out(A) = (2:size(A,1)-1, 2:size(A,2)-1)
e = EdgeIterator(out(Q), in(Q))
EdgeIterator((2:99, 2:99), (3:98, 3:98))
@btime foo(Q,iter, e)
15.500 μs (0 allocations: 0 bytes)
ind = [CartesianIndex((0,0))]
@btime foo(Q,ind, e)
1.730 μs (0 allocations: 0 bytes)
Whether you consider this good or not is trickier - a linear scaling here is pretty much the worst-case scenario. When we reverse the order of the loops, we get:
function foo2(Q, iter, e)
for p in e
for i in iter
Q[i+p]
end
end
end
@btime foo2(Q,iter, e)
3.663 μs (0 allocations: 0 bytes)
@btime foo2(Q,ind, e)
1.620 μs (0 allocations: 0 bytes)
So definitely faster (by 5x). I wonder why that is - it should be much quicker the other way round, but maybe some weird optimisation is happening behind the scenes from getindex?
Does outer = map((n,r)->r-1:n-r+1, dims, sdims) work?
Yes, it looks like it works, but I think it's not known at compile time (vs ntuple?)
Any developments on this since then? It would also be useful if the component_centroids()
function received the same option to account for periodic boundaries
Yes, I wrote a version of this which works using TiledIteration.jl, as it has utilities to refer to edge regions of nD structures. I didn't really get a clean integration with ImageMorphology, but this gist does provide the functionality, and it seems to work for my use case. There's also a function which can operate on a subset of an array, label_samples
. It'd probably be better to seperate the edge and center parts of the code as that'd allow specialisation and use of half_pattern
for the center region instead of my replacement full_pattern
. It's worth noting I only built this to account for kernels which are 3 wide, but it could fairly easily be adapted to work for any width, as TiledIteration is totally capable of adapting to this behaviour.