Skip to content

Commit 2ed7f6b

Browse files
authored
Merge pull request #840 from igraph/docs/stochasctic-community-detection
2 parents a695f09 + 132c0c7 commit 2ed7f6b

File tree

1 file changed

+149
-0
lines changed

1 file changed

+149
-0
lines changed
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""
2+
.. _tutorials-stochastic-variability:
3+
4+
=========================================================
5+
Stochastic Variability in Community Detection Algorithms
6+
=========================================================
7+
8+
This example demonstrates the use of stochastic community detection methods to check whether a network possesses a strong community structure, and whether the partitionings we obtain are meaningul. Many community detection algorithms are randomized, and return somewhat different results after each run, depending on the random seed that was set. When there is a robust community structure, we expect these results to be similar to each other. When the community structure is weak or non-existent, the results may be noisy and highly variable. We will employ several partion similarity measures to analyse the consistency of the results, including the normalized mutual information (NMI), the variation of information (VI), and the Rand index (RI).
9+
10+
"""
11+
# %%
12+
import igraph as ig
13+
import matplotlib.pyplot as plt
14+
import itertools
15+
import random
16+
17+
# %%
18+
# .. note::
19+
# We set a random seed to ensure that the results look exactly the same in
20+
# the gallery. You don't need to do this when exploring randomness.
21+
random.seed(42)
22+
23+
# %%
24+
# We will use Zachary's karate club dataset [1]_, a classic example of a network
25+
# with a strong community structure:
26+
karate = ig.Graph.Famous("Zachary")
27+
28+
# %%
29+
# We will compare it to an an Erdős-Rényi :math:`G(n, m)` random network having
30+
# the same number of vertices and edges. The parameters 'n' and 'm' refer to the
31+
# vertex and edge count, respectively. Since this is a random network, it should
32+
# have no community structure.
33+
random_graph = ig.Graph.Erdos_Renyi(n=karate.vcount(), m=karate.ecount())
34+
35+
# %%
36+
# First, let us plot the two networks for a visual comparison:
37+
38+
# Create subplots
39+
fig, axes = plt.subplots(1, 2, figsize=(12, 6), subplot_kw={'aspect': 'equal'})
40+
41+
# Karate club network
42+
ig.plot(
43+
karate, target=axes[0],
44+
vertex_color="lightblue", vertex_size=30,
45+
vertex_label=range(karate.vcount()), vertex_label_size=10,
46+
edge_width=1
47+
)
48+
axes[0].set_title("Karate club network")
49+
50+
# Random network
51+
ig.plot(
52+
random_graph, target=axes[1],
53+
vertex_color="lightcoral", vertex_size=30,
54+
vertex_label=range(random_graph.vcount()), vertex_label_size=10,
55+
edge_width=1
56+
)
57+
axes[1].set_title("Erdős-Rényi random network")
58+
59+
plt.show()
60+
61+
# %%
62+
# Function to compute similarity between partitions using various methods:
63+
def compute_pairwise_similarity(partitions, method):
64+
similarities = []
65+
66+
for p1, p2 in itertools.combinations(partitions, 2):
67+
similarity = ig.compare_communities(p1, p2, method=method)
68+
similarities.append(similarity)
69+
70+
return similarities
71+
72+
# %%
73+
# The Leiden method, accessible through :meth:`igraph.Graph.community_leiden()`,
74+
# is a modularity maximization approach for community detection. Since exact
75+
# modularity maximization is NP-hard, the algorithm employs a greedy heuristic
76+
# that processes vertices in a random order. This randomness leads to
77+
# variation in the detected communities across different runs, which is why
78+
# results may differ each time the method is applied. The following function
79+
# runs the Leiden algorithm multiple times:
80+
def run_experiment(graph, iterations=100):
81+
partitions = [graph.community_leiden(objective_function='modularity').membership for _ in range(iterations)]
82+
nmi_scores = compute_pairwise_similarity(partitions, method="nmi")
83+
vi_scores = compute_pairwise_similarity(partitions, method="vi")
84+
ri_scores = compute_pairwise_similarity(partitions, method="rand")
85+
return nmi_scores, vi_scores, ri_scores
86+
87+
# %%
88+
# Run the experiment on both networks:
89+
nmi_karate, vi_karate, ri_karate = run_experiment(karate)
90+
nmi_random, vi_random, ri_random = run_experiment(random_graph)
91+
92+
# %%
93+
# Finally, let us plot histograms of the pairwise similarities of the obtained
94+
# partitionings to understand the result:
95+
fig, axes = plt.subplots(2, 3, figsize=(12, 6))
96+
measures = [
97+
# Normalized Mutual Information (0-1, higher = more similar)
98+
(nmi_karate, nmi_random, "NMI", 0, 1),
99+
# Variation of Information (0+, lower = more similar)
100+
(vi_karate, vi_random, "VI", 0, max(vi_karate + vi_random)),
101+
# Rand Index (0-1, higher = more similar)
102+
(ri_karate, ri_random, "RI", 0, 1),
103+
]
104+
colors = ["red", "blue", "green"]
105+
106+
for i, (karate_scores, random_scores, measure, lower, upper) in enumerate(measures):
107+
# Karate club histogram
108+
axes[0][i].hist(
109+
karate_scores, bins=20, range=(lower, upper),
110+
density=True, # Probability density
111+
alpha=0.7, color=colors[i], edgecolor="black"
112+
)
113+
axes[0][i].set_title(f"{measure} - Karate club network")
114+
axes[0][i].set_xlabel(f"{measure} score")
115+
axes[0][i].set_ylabel("PDF")
116+
117+
# Random network histogram
118+
axes[1][i].hist(
119+
random_scores, bins=20, range=(lower, upper), density=True,
120+
alpha=0.7, color=colors[i], edgecolor="black"
121+
)
122+
axes[1][i].set_title(f"{measure} - Random network")
123+
axes[1][i].set_xlabel(f"{measure} score")
124+
axes[0][i].set_ylabel("PDF")
125+
126+
plt.tight_layout()
127+
plt.show()
128+
129+
# %%
130+
# We have compared the pairwise similarities using the NMI, VI, and RI measures
131+
# between partitonings obtained for the karate club network (strong community
132+
# structure) and a comparable random graph (which lacks communities).
133+
#
134+
# The Normalized Mutual Information (NMI) and Rand Index (RI) both quantify
135+
# similarity, and take values from :math:`[0,1]`. Higher values indicate more
136+
# similar partitionings, with a value of 1 attained when the partitionings are
137+
# identical.
138+
#
139+
# The Variation of Information (VI) is a distance measure. It takes values from
140+
# :math:`[0,\infty]`, with lower values indicating higher similarities. Identical
141+
# partitionings have a distance of zero.
142+
#
143+
# For the karate club network, NMI and RI value are concentrated near 1, while
144+
# VI is concentrated near 0, suggesting a robust community structure. In contrast
145+
# the values obtained for the random network are much more spread out, showing
146+
# inconsistent partitionings due to the lack of a clear community structure.
147+
148+
# %%
149+
# .. [1] W. Zachary: "An Information Flow Model for Conflict and Fission in Small Groups". Journal of Anthropological Research 33, no. 4 (1977): 452–73. https://www.jstor.org/stable/3629752

0 commit comments

Comments
 (0)