|
| 1 | +""" |
| 2 | + louvain(g, distmx=weights(g), γ=1; max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=10e-10, merge_tol::Real=10e-10, rng=nothing, seed=nothing) |
| 3 | +
|
| 4 | +Community detection using the louvain algorithm. Finds a partition of the vertices that |
| 5 | +attempts to maximize the modularity. Returns a vector of community ids. |
| 6 | +
|
| 7 | +### Optional Arguments |
| 8 | +- `distmx=weights(g)`: distance matrix for weighted graphs |
| 9 | +- `γ=1.0`: where `γ > 0` is a resolution parameter. Higher resolutions lead to more |
| 10 | + communities, while lower resolutions lead to fewer communities. Where `γ=1.0` it |
| 11 | + leads to the traditional definition of the modularity. |
| 12 | +- `max_moves=1000`: maximum number of rounds moving vertices before merging. |
| 13 | +- `max_merges=1000`: maximum number of merges. |
| 14 | +- `move_tol=10e-10`: necessary increase of modularity to move a vertex. |
| 15 | +- `merge_tol=10e-10`: necessary increase of modularity in the move stage to merge. |
| 16 | +- `rng=nothing`: rng to use for reproducibility. May only pass one of rng or seed. |
| 17 | +- `seed=nothing`: seed to use for reproducibility. May only pass one of rng or seed. |
| 18 | +
|
| 19 | +### References |
| 20 | +- [Vincent D Blondel et al J. Stat. Mech. (2008) P10008][https://doi.org/10.1088/1742-5468/2008/10/P10008] |
| 21 | +- [Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.][https://hal.science/hal-01231784/document] |
| 22 | +
|
| 23 | +# Examples |
| 24 | +```jldoctest |
| 25 | +julia> using Graphs |
| 26 | +
|
| 27 | +julia> barbell = blockdiag(complete_graph(3), complete_graph(3)); |
| 28 | +
|
| 29 | +julia> add_edge!(barbell, 1, 4); |
| 30 | +
|
| 31 | +julia> louvain(barbell) |
| 32 | +6-element Vector{Int64}: |
| 33 | + 1 |
| 34 | + 1 |
| 35 | + 1 |
| 36 | + 2 |
| 37 | + 2 |
| 38 | + 2 |
| 39 | +
|
| 40 | +julia> louvain(barbell, γ=0.01) |
| 41 | +6-element Vector{Int64}: |
| 42 | + 1 |
| 43 | + 1 |
| 44 | + 1 |
| 45 | + 1 |
| 46 | + 1 |
| 47 | + 1 |
| 48 | +``` |
| 49 | +""" |
| 50 | +function louvain( |
| 51 | + g::AbstractGraph{T}; |
| 52 | + γ=1.0, |
| 53 | + distmx::AbstractArray{<:Number}=weights(g), |
| 54 | + max_moves::Integer=1000, |
| 55 | + max_merges::Integer=1000, |
| 56 | + move_tol::Real=10e-10, |
| 57 | + merge_tol::Real=10e-10, |
| 58 | + rng::Union{Nothing,AbstractRNG}=nothing, |
| 59 | + seed::Union{Nothing,Integer}=nothing, |
| 60 | +) where {T} |
| 61 | + rng = rng_from_rng_or_seed(rng, seed) |
| 62 | + n = nv(g) |
| 63 | + if n == 0 |
| 64 | + return T[] |
| 65 | + end |
| 66 | + |
| 67 | + @debug "Running louvain with parameters γ=$(γ), max_moves=$(max_moves), " * |
| 68 | + "max_merges=$(max_merges), move_tol=$(move_tol), merge_tol=$(merge_tol)" |
| 69 | + |
| 70 | + actual_coms = collect(one(T):nv(g)) |
| 71 | + current_coms = copy(actual_coms) |
| 72 | + # actual_coms is always of length nv(g) and holds the current com for each v in g |
| 73 | + # current_coms is for the current graph; after merges it will be smaller than nv(g) |
| 74 | + |
| 75 | + for iter in 0:max_merges |
| 76 | + current_modularity = modularity(g, current_coms; distmx=distmx, γ=γ) |
| 77 | + @debug "Merge iteration $(iter). Current modularity is $(current_modularity)" |
| 78 | + louvain_move!(g, γ, current_coms, rng, distmx, max_moves, move_tol) |
| 79 | + # remap communities to 1-nc |
| 80 | + com_map = Dict(old => new for (new, old) in enumerate(unique(current_coms))) |
| 81 | + for i in eachindex(actual_coms) |
| 82 | + actual_coms[i] = com_map[current_coms[actual_coms[i]]] |
| 83 | + end |
| 84 | + @debug "Communities after moving in iteration $(iter): $(actual_coms)" |
| 85 | + for i in eachindex(current_coms) |
| 86 | + current_coms[i] = com_map[current_coms[i]] |
| 87 | + end |
| 88 | + |
| 89 | + # Stop if modularity gain is too small |
| 90 | + new_modularity = modularity(g, current_coms; distmx=distmx, γ=γ) |
| 91 | + @debug "New modularity is $(new_modularity) for a gain of $(new_modularity - |
| 92 | + current_modularity)" |
| 93 | + if new_modularity - current_modularity < merge_tol |
| 94 | + break |
| 95 | + end |
| 96 | + g, distmx = louvain_merge(g, current_coms, distmx) |
| 97 | + if nv(g) == 1 # nothing left to merge |
| 98 | + break |
| 99 | + end |
| 100 | + current_coms = collect(one(T):nv(g)) |
| 101 | + end |
| 102 | + return actual_coms |
| 103 | +end |
| 104 | + |
| 105 | +""" |
| 106 | + louvain_move!(g, γ, c, rng, distmx=weights(g), max_moves=1000, move_tol=10e-10) |
| 107 | +
|
| 108 | +The move stage of the louvain algorithm. |
| 109 | +""" |
| 110 | +function louvain_move!( |
| 111 | + g, γ, c, rng, distmx=weights(g), max_moves::Integer=1000, move_tol::Real=10e-10 |
| 112 | +) |
| 113 | + vertex_order = shuffle!(rng, collect(vertices(g))) |
| 114 | + nc = maximum(c) |
| 115 | + |
| 116 | + # Compute graph and community volumes |
| 117 | + m = 0 |
| 118 | + c_vols = zeros(eltype(distmx), ((is_directed(g) ? 2 : 1), nc)) |
| 119 | + # if is_directed use row 1 for in and 2 for out |
| 120 | + for e in edges(g) |
| 121 | + m += distmx[src(e), dst(e)] |
| 122 | + c_vols[1, c[src(e)]] += distmx[src(e), dst(e)] |
| 123 | + if is_directed(g) |
| 124 | + c_vols[2, c[dst(e)]] += distmx[src(e), dst(e)] |
| 125 | + else |
| 126 | + c_vols[1, c[dst(e)]] += distmx[src(e), dst(e)] |
| 127 | + end |
| 128 | + end |
| 129 | + |
| 130 | + for _ in 1:max_moves |
| 131 | + last_change = nothing |
| 132 | + for v in vertex_order |
| 133 | + if v == last_change # stop if we see each vertex and no movement |
| 134 | + return nothing |
| 135 | + end |
| 136 | + potential_coms = unique(c[u] for u in all_neighbors(g, v)) |
| 137 | + filter!(!=(c[v]), potential_coms) |
| 138 | + @debug "Moving vertex $(v) from com $(c[v]) to potential_coms $(potential_coms)" |
| 139 | + if isempty(potential_coms) # Continue if there are no other neighboring coms |
| 140 | + continue |
| 141 | + end |
| 142 | + shuffle!(rng, potential_coms) # Break ties randomly by first com |
| 143 | + |
| 144 | + #Remove vertex degrees from current community |
| 145 | + out_degree = sum( |
| 146 | + u == v ? 2distmx[v, u] : distmx[v, u] for u in outneighbors(g, v) |
| 147 | + ) |
| 148 | + c_vols[1, c[v]] -= out_degree |
| 149 | + |
| 150 | + in_degree = 0.0 # defined outside to keep JET.jl happy |
| 151 | + if is_directed(g) |
| 152 | + in_degree = sum( |
| 153 | + u == v ? 2distmx[v, u] : distmx[v, u] for u in inneighbors(g, v) |
| 154 | + ) |
| 155 | + c_vols[2, c[v]] -= in_degree |
| 156 | + end |
| 157 | + |
| 158 | + # Compute loss in modularity by removing vertex |
| 159 | + loss = ΔQ(g, γ, distmx, c, v, m, c[v], c_vols) |
| 160 | + @debug "Q loss of removing vertex $(v) from its community: $(loss)" |
| 161 | + # Compute gain by moving to alternate neighboring community |
| 162 | + this_ΔQ = c_potential -> ΔQ(g, γ, distmx, c, v, m, c_potential, c_vols) |
| 163 | + best_ΔQ, best_com_id = findmax(this_ΔQ, potential_coms) |
| 164 | + best_com = potential_coms[best_com_id] |
| 165 | + @debug "Best move is to $(best_com) with Q gain of $(best_ΔQ)" |
| 166 | + if best_ΔQ - loss > move_tol |
| 167 | + c[v] = best_com |
| 168 | + c_vols[1, best_com] += out_degree |
| 169 | + if is_directed(g) |
| 170 | + c_vols[2, best_com] += in_degree |
| 171 | + end |
| 172 | + last_change = v |
| 173 | + @debug "Moved vertex $(v) to community $(best_com)" |
| 174 | + else |
| 175 | + c_vols[1, c[v]] += out_degree |
| 176 | + if is_directed(g) |
| 177 | + c_vols[2, c[v]] += in_degree |
| 178 | + end |
| 179 | + @debug "Insufficient Q gain, vertex $(v) stays in community $(c[v])" |
| 180 | + end |
| 181 | + end |
| 182 | + if isnothing(last_change) # No movement |
| 183 | + return nothing |
| 184 | + end |
| 185 | + end |
| 186 | +end |
| 187 | + |
| 188 | +""" |
| 189 | + ΔQ(g, γ, distmx, c, v, m, c_potential, c_vols) |
| 190 | +
|
| 191 | +Compute the change in modularity when adding vertex v a potential community. |
| 192 | +""" |
| 193 | +function ΔQ(g, γ, distmx, c, v, m, c_potential, c_vols) |
| 194 | + if is_directed(g) |
| 195 | + out_degree = 0 |
| 196 | + com_out_degree = 0 |
| 197 | + for u in outneighbors(g, v) |
| 198 | + out_degree += distmx[v, u] |
| 199 | + if c[u] == c_potential || u == v |
| 200 | + com_out_degree += distmx[v, u] |
| 201 | + end |
| 202 | + end |
| 203 | + |
| 204 | + in_degree = 0 |
| 205 | + com_in_degree = 0 |
| 206 | + for u in inneighbors(g, v) |
| 207 | + in_degree += distmx[u, v] |
| 208 | + if c[u] == c_potential || u == v |
| 209 | + com_in_degree += distmx[u, v] |
| 210 | + end |
| 211 | + end |
| 212 | + |
| 213 | + # Singleton special case |
| 214 | + if c_vols[1, c_potential] == 0 && c_vols[2, c_potential] == 0 |
| 215 | + return (com_in_degree+com_out_degree)/m - γ*2(in_degree + out_degree)/m^2 |
| 216 | + end |
| 217 | + return (com_in_degree+com_out_degree)/m - |
| 218 | + γ*(in_degree*c_vols[1, c_potential]+out_degree*c_vols[2, c_potential])/m^2 |
| 219 | + else |
| 220 | + degree = 0 |
| 221 | + com_degree = 0 |
| 222 | + for u in neighbors(g, v) |
| 223 | + degree += u == v ? 2distmx[u, v] : distmx[u, v] |
| 224 | + if u == v |
| 225 | + com_degree += 2distmx[u, v] |
| 226 | + elseif c[u] == c_potential |
| 227 | + com_degree += distmx[u, v] |
| 228 | + end |
| 229 | + end |
| 230 | + # Singleton special case |
| 231 | + if c_vols[1, c_potential] == 0 |
| 232 | + return com_degree/2m - γ*(degree/2m)^2 |
| 233 | + end |
| 234 | + return com_degree/2m - γ*degree*c_vols[1, c_potential]/2m^2 |
| 235 | + end |
| 236 | +end |
| 237 | + |
| 238 | +""" |
| 239 | + louvain_merge(g, c, distmx) |
| 240 | +
|
| 241 | +Merge stage of the louvain algorithm. |
| 242 | +""" |
| 243 | +function louvain_merge(g::AbstractGraph{T}, c, distmx) where {T} |
| 244 | + # c is assumed to be 1:nc |
| 245 | + nc = maximum(c) |
| 246 | + new_distmx = Dict{Tuple{T,T},eltype(distmx)}() |
| 247 | + new_graph = is_directed(g) ? SimpleDiGraph{T}(nc) : SimpleGraph{T}(nc) |
| 248 | + for e in edges(g) |
| 249 | + new_src = c[src(e)] |
| 250 | + new_dst = c[dst(e)] |
| 251 | + if haskey(new_distmx, (new_src, new_dst)) |
| 252 | + new_distmx[(new_src, new_dst)] += distmx[src(e), dst(e)] |
| 253 | + else |
| 254 | + new_distmx[(new_src, new_dst)] = distmx[src(e), dst(e)] |
| 255 | + end |
| 256 | + add_edge!(new_graph, new_src, new_dst) |
| 257 | + end |
| 258 | + |
| 259 | + # Convert new_distmx Dict to SparseArray |
| 260 | + r = [k[1] for k in keys(new_distmx)] |
| 261 | + c = [k[2] for k in keys(new_distmx)] |
| 262 | + v = [v for v in values(new_distmx)] |
| 263 | + new_distmx = sparse(r, c, v, nc, nc) |
| 264 | + |
| 265 | + if !is_directed(new_graph) |
| 266 | + new_distmx = new_distmx + transpose(new_distmx) |
| 267 | + new_distmx[diagind(new_distmx)] ./= 2 |
| 268 | + end |
| 269 | + |
| 270 | + return new_graph, new_distmx |
| 271 | +end |
0 commit comments