Skip to content

Commit 89dabeb

Browse files
thchrKrastanovclaude
authored
Count number of connected components more efficiently than length(connected_components(g)) (#407)
Co-authored-by: Stefan Krastanov <github.acc@krastanov.org> Co-authored-by: Stefan Krastanov <stefan@krastanov.org> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 64eb48b commit 89dabeb

File tree

5 files changed

+114
-16
lines changed

5 files changed

+114
-16
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
- `maximum_clique`, `clique_number`, `maximal_independent_sets`, `maximum_independent_set`, `independence_number`
88
- `regular_tree` generator
99
- `kruskal_mst` now accepts weight vectors
10+
- `count_connected_components` for efficiently counting connected components without materializing them
11+
- `connected_components!` is now exported and accepts an optional `search_queue` argument to reduce allocations
12+
- `is_connected` optimized to avoid allocating component vectors
1013

1114
## v1.13.0 - 2025-06-05
1215
- **(breaking)** Julia v1.10 (LTS) minimum version requirement

src/Graphs.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ export
212212

213213
# connectivity
214214
connected_components,
215+
connected_components!,
216+
count_connected_components,
215217
strongly_connected_components,
216218
strongly_connected_components_kosaraju,
217219
strongly_connected_components_tarjan,

src/connectivity.jl

Lines changed: 90 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,37 @@
11
# Parts of this code were taken / derived from Graphs.jl. See LICENSE for
22
# licensing details.
33
"""
4-
connected_components!(label, g)
4+
connected_components!(label, g, [search_queue])
55
66
Fill `label` with the `id` of the connected component in the undirected graph
77
`g` to which it belongs. Return a vector representing the component assigned
88
to each vertex. The component value is the smallest vertex ID in the component.
99
10-
### Performance
10+
## Optional arguments
11+
- `search_queue`, an empty `Vector{eltype(edgetype(g))}`, can be provided to avoid
12+
reallocating this work array repeatedly on repeated calls of `connected_components!`.
13+
If not provided, it is automatically instantiated.
14+
15+
!!! warning "Experimental"
16+
The `search_queue` argument is experimental and subject to potential change
17+
in future versions of Graphs.jl.
18+
19+
## Performance
1120
This algorithm is linear in the number of edges of the graph.
1221
"""
13-
function connected_components!(label::AbstractVector, g::AbstractGraph{T}) where {T}
22+
function connected_components!(
23+
label::AbstractVector{T}, g::AbstractGraph{T}, search_queue::Vector{T}=Vector{T}()
24+
) where {T}
25+
empty!(search_queue)
1426
for u in vertices(g)
1527
label[u] != zero(T) && continue
1628
label[u] = u
17-
Q = Vector{T}()
18-
push!(Q, u)
19-
while !isempty(Q)
20-
src = popfirst!(Q)
29+
push!(search_queue, u)
30+
while !isempty(search_queue)
31+
src = popfirst!(search_queue)
2132
for vertex in all_neighbors(g, src)
2233
if label[vertex] == zero(T)
23-
push!(Q, vertex)
34+
push!(search_queue, vertex)
2435
label[vertex] = u
2536
end
2637
end
@@ -129,9 +140,78 @@ julia> is_connected(g)
129140
true
130141
```
131142
"""
132-
function is_connected(g::AbstractGraph)
143+
function is_connected(g::AbstractGraph{T}) where {T}
133144
mult = is_directed(g) ? 2 : 1
134-
return mult * ne(g) + 1 >= nv(g) && length(connected_components(g)) == 1
145+
if mult * ne(g) + 1 >= nv(g)
146+
label = zeros(T, nv(g))
147+
connected_components!(label, g)
148+
return allequal(label)
149+
else
150+
return false
151+
end
152+
end
153+
154+
"""
155+
count_connected_components( g, [label, search_queue]; reset_label::Bool=false)
156+
157+
Return the number of connected components in `g`.
158+
159+
Equivalent to `length(connected_components(g))` but uses fewer allocations by not
160+
materializing the component vectors explicitly.
161+
162+
## Optional arguments
163+
Mutated work arrays, `label` and `search_queue` can be provided to avoid allocating these
164+
arrays repeatedly on repeated calls of `count_connected_components`.
165+
For `g :: AbstractGraph{T}`, `label` must be a zero-initialized `Vector{T}` of length
166+
`nv(g)` and `search_queue` a `Vector{T}`. See also [`connected_components!`](@ref).
167+
168+
!!! warning "Experimental"
169+
The `search_queue` and `label` arguments are experimental and subject to potential
170+
change in future versions of Graphs.jl.
171+
172+
## Keyword arguments
173+
- `reset_label :: Bool` (default, `false`): if `true`, `label` is reset to a zero-vector
174+
before returning.
175+
176+
## Example
177+
```
178+
julia> using Graphs
179+
180+
julia> g = Graph(Edge.([1=>2, 2=>3, 3=>1, 4=>5, 5=>6, 6=>4, 7=>8]));
181+
182+
length> connected_components(g)
183+
3-element Vector{Vector{Int64}}:
184+
[1, 2, 3]
185+
[4, 5, 6]
186+
[7, 8]
187+
188+
julia> count_connected_components(g)
189+
3
190+
```
191+
"""
192+
function count_connected_components(
193+
g::AbstractGraph{T},
194+
label::AbstractVector{T}=zeros(T, nv(g)),
195+
search_queue::Vector{T}=Vector{T}();
196+
reset_label::Bool=false,
197+
) where {T}
198+
connected_components!(label, g, search_queue)
199+
c = count_unique(label)
200+
reset_label && fill!(label, zero(eltype(label)))
201+
return c
202+
end
203+
204+
function count_unique(label::Vector{T}) where {T}
205+
# effectively does `length(Set(label))` but faster, since `Set(label)` sizehints
206+
# aggressively and assumes that most elements of `label` will be unique, which very
207+
# rarely will be the case for caller `count_connected_components!`
208+
seen = T === Int ? BitSet() : Set{T}() # if `T=Int`, we can use faster BitSet
209+
for l in label
210+
# faster than direct `push!(seen, l)` when `label` has few unique elements relative
211+
# to `length(label)`
212+
l seen && push!(seen, l)
213+
end
214+
return length(seen)
135215
end
136216

137217
"""

test/operators.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@
268268
for i in 3:4
269269
@testset "Tensor Product: $g" for g in testgraphs(path_graph(i))
270270
@test length(connected_components(tensor_product(g, g))) == 2
271+
@test count_connected_components(tensor_product(g, g)) == 2
271272
end
272273
end
273274

test/spanningtrees/boruvka.jl

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,18 @@
2121
g1t = GenericGraph(SimpleGraph(edges1))
2222
@test res1.weight == cost_mst
2323
# acyclic graphs have n - c edges
24-
@test nv(g1t) - length(connected_components(g1t)) == ne(g1t)
24+
@test nv(g1t) - ne(g1t) ==
25+
length(connected_components(g1t)) ==
26+
count_connected_components(g1t)
2527
@test nv(g1t) == nv(g)
2628

2729
res2 = boruvka_mst(g, distmx; minimize=false)
2830
edges2 = [Edge(src(e), dst(e)) for e in res2.mst]
2931
g2t = GenericGraph(SimpleGraph(edges2))
3032
@test res2.weight == cost_max_vec_mst
31-
@test nv(g2t) - length(connected_components(g2t)) == ne(g2t)
33+
@test nv(g2t) - ne(g2t) ==
34+
length(connected_components(g2t)) ==
35+
count_connected_components(g2t)
3236
@test nv(g2t) == nv(g)
3337
end
3438
# second test
@@ -60,14 +64,18 @@
6064
edges3 = [Edge(src(e), dst(e)) for e in res3.mst]
6165
g3t = GenericGraph(SimpleGraph(edges3))
6266
@test res3.weight == weight_vec2
63-
@test nv(g3t) - length(connected_components(g3t)) == ne(g3t)
67+
@test nv(g3t) - ne(g3t) ==
68+
length(connected_components(g3t)) ==
69+
count_connected_components(g3t)
6470
@test nv(g3t) == nv(gx)
6571

6672
res4 = boruvka_mst(g, distmx_sec; minimize=false)
6773
edges4 = [Edge(src(e), dst(e)) for e in res4.mst]
6874
g4t = GenericGraph(SimpleGraph(edges4))
6975
@test res4.weight == weight_max_vec2
70-
@test nv(g4t) - length(connected_components(g4t)) == ne(g4t)
76+
@test nv(g4t) - ne(g4t) ==
77+
length(connected_components(g4t)) ==
78+
count_connected_components(g4t)
7179
@test nv(g4t) == nv(gx)
7280
end
7381

@@ -123,14 +131,18 @@
123131
edges5 = [Edge(src(e), dst(e)) for e in res5.mst]
124132
g5t = GenericGraph(SimpleGraph(edges5))
125133
@test res5.weight == weight_vec3
126-
@test nv(g5t) - length(connected_components(g5t)) == ne(g5t)
134+
@test nv(g5t) - ne(g5t) ==
135+
length(connected_components(g5t)) ==
136+
count_connected_components(g5t)
127137
@test nv(g5t) == nv(gd)
128138

129139
res6 = boruvka_mst(g, distmx_third; minimize=false)
130140
edges6 = [Edge(src(e), dst(e)) for e in res6.mst]
131141
g6t = GenericGraph(SimpleGraph(edges6))
132142
@test res6.weight == weight_max_vec3
133-
@test nv(g6t) - length(connected_components(g6t)) == ne(g6t)
143+
@test nv(g6t) - ne(g6t) ==
144+
length(connected_components(g6t)) ==
145+
count_connected_components(g6t)
134146
@test nv(g6t) == nv(gd)
135147
end
136148
end

0 commit comments

Comments
 (0)