pip install cdlib


pip install networkx


from cdlib import algorithms
import networkx as nx
import cdlib


g = nx.karate_club_graph()
lp_coms = algorithms.label_propagation(g)
leiden_coms = algorithms.leiden(g)


# Nom de l'algorithme de clustering
leiden_coms.method_name

'Leiden'


# Paramètres du Clustering 
leiden_coms.method_parameters

{'initial_membership': None, 'weights': None}


# Identification du Clusering
leiden_coms.communities

[[8, 9, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33],
 [0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21],
 [23, 24, 25, 27, 28, 31],
 [4, 5, 6, 10, 16]]


leiden_coms.overlap

False


# Pourcentage de nœuds couverts par le clustering
leiden_coms.node_coverage

1.0


leiden_coms.to_json()

'{"communities": [[8, 9, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33], [0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21], [23, 24, 25, 27, 28, 31], [4, 5, 6, 10, 16]], "algorithm": "Leiden", "params": {"initial_membership": null, "weights": null}, "overlap": false, "coverage": 1.0}'


leiden_coms.average_internal_degree()

FitnessResult(min=2.3333333333333335, max=4.181818181818182, score=3.103787878787879, std=0.7758948002447444)


leiden_coms.average_internal_degree(summary=False)

[3.5, 4.181818181818182, 2.3333333333333335, 2.4]


from cdlib import evaluation

evaluation.average_internal_degree(g, leiden_coms)

FitnessResult(min=2.3333333333333335, max=4.181818181818182, score=3.103787878787879, std=0.7758948002447444)


leiden_coms.normalized_mutual_information(lp_coms)

MatchingResult(score=0.5421597004371672, std=None)


evaluation.normalized_mutual_information(leiden_coms, lp_coms)

MatchingResult(score=0.5421597004371672, std=None)


from cdlib import viz

pos = nx.spring_layout(g)
viz.plot_network_clusters(g, leiden_coms, pos, figsize=(5, 5))

<matplotlib.collections.PathCollection at 0x22a930e4f40>


viz.plot_network_clusters(g, lp_coms, pos, figsize=(5, 5))

<matplotlib.collections.PathCollection at 0x22a9315a1c0>


viz.plot_community_graph(g, leiden_coms, figsize=(3, 3))

<matplotlib.collections.PathCollection at 0x22a931b3a30>


viz.plot_community_graph(g, lp_coms, figsize=(3, 3))

<matplotlib.collections.PathCollection at 0x22a9322cca0>


viz.plot_com_stat([leiden_coms, lp_coms], evaluation.internal_edge_density)

<AxesSubplot:xlabel='Algorithm', ylabel='internal_edge_density'>


viz.plot_com_properties_relation([leiden_coms, lp_coms], evaluation.size, evaluation.internal_edge_density)

<seaborn.axisgrid.FacetGrid at 0x22a932a8640>


from cdlib import NodeClustering

g1 = nx.generators.community.LFR_benchmark_graph(1000, 3, 1.5, 0.5, min_community=20, average_degree=5)
g2 = nx.generators.community.LFR_benchmark_graph(1000, 3, 1.5, 0.6, min_community=20, average_degree=5)
g3 = nx.generators.community.LFR_benchmark_graph(1000, 3, 1.5, 0.7, min_community=20, average_degree=5)

names = ["g1", "g2", "g3"]
graphs = [g1, g2, g3]
references = []

# building the NodeClustering ground truth for the graphs
for g in graphs:
    ground_truth = NodeClustering(communities={frozenset(g.nodes[v]['community']) for v in g}, graph=g, method_name="reference")
    references.append(ground_truth)
    
algos = [algorithms.leiden, algorithms.label_propagation]

# Computing the visualization (2 execution per method, NMI as scoring for ground truth resemblance)
viz.plot_scoring(graphs, references, names, algos, scoring=evaluation.adjusted_mutual_information, nbRuns=2)

<AxesSubplot:xlabel='graph', ylabel='score'>


from cdlib import ensemble

# Louvain configuration
methods = [algorithms.louvain, algorithms.leiden]

for coms in ensemble.pool(g, methods, configurations=[[], []]):
    print(coms.method_name, "\n", coms.communities)

Louvain 
 [[8, 14, 15, 18, 20, 22, 23, 26, 27, 29, 30, 32, 33], [0, 1, 2, 3, 7, 9, 11, 12, 13, 17, 19, 21], [4, 5, 6, 10, 16], [24, 25, 28, 31]]
Leiden 
 [[8, 9, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33], [0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21], [23, 24, 25, 27, 28, 31], [4, 5, 6, 10, 16]]


# Exemple de configuration de Louvain
resolution = ensemble.Parameter(name="resolution", start=0.1)
randomize = ensemble.BoolParameter(name="randomize", value=True)
louvain_conf = [resolution, randomize]

methods = [algorithms.louvain, algorithms.leiden]

for communities in ensemble.pool(g, methods, [louvain_conf, []]):
    print(coms.method_name, "\n", coms.communities)

Leiden 
 [[8, 9, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33], [0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21], [23, 24, 25, 27, 28, 31], [4, 5, 6, 10, 16]]
Leiden 
 [[8, 9, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33], [0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21], [23, 24, 25, 27, 28, 31], [4, 5, 6, 10, 16]]


resolution = ensemble.Parameter(name="resolution", start=0.1, end=1, step=0.1) # numeric range
randomize = ensemble.BoolParameter(name="randomize") # boolean range [True, False]


resolution = ensemble.Parameter(name="resolution", start=0.7, end=1, step=0.1)

for coms in ensemble.grid_execution(graph=g, method=algorithms.louvain, parameters=[resolution]):
    print(coms.method_name, coms.method_parameters, "\n", coms.communities, "\n")

Louvain {'weight': 'weight', 'resolution': 0.7, 'randomize': None} 
 [[8, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33], [0, 1, 3, 7, 11, 12, 13, 17, 19, 21], [23, 24, 25, 27, 28, 31], [4, 5, 6, 10, 16], [2, 9]] 

Louvain {'weight': 'weight', 'resolution': 0.7999999999999999, 'randomize': None} 
 [[8, 9, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33], [0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21], [23, 24, 25, 27, 28, 31], [4, 5, 6, 10, 16]] 

Louvain {'weight': 'weight', 'resolution': 0.8999999999999999, 'randomize': None} 
 [[8, 9, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33], [0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21], [23, 24, 25, 27, 28, 31], [4, 5, 6, 10, 16]] 

Louvain {'weight': 'weight', 'resolution': 0.9999999999999999, 'randomize': None} 
 [[8, 9, 14, 15, 18, 20, 22, 26, 29, 30, 32, 33], [0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21], [23, 24, 25, 27, 28, 31], [4, 5, 6, 10, 16]]


resolution = ensemble.Parameter(name="resolution", start=0.1, end=1, step=0.1)
randomize = ensemble.BoolParameter(name="randomize")

coms, scoring = ensemble.grid_search(graph=g, method=algorithms.louvain,
                                                     parameters=[resolution, randomize],
                                                     quality_score=evaluation.erdos_renyi_modularity,
                                                     aggregate=max)

print("Communities:\n %s \nConfiguration: %s \nScoring: %s" %(coms.communities, coms.method_parameters, scoring))

Communities:
 [[8, 14, 15, 18, 20, 22, 23, 26, 27, 29, 30, 32, 33], [0, 1, 2, 3, 7, 9, 11, 12, 13, 17, 19, 21], [4, 5, 6, 10, 16], [24, 25, 28, 31]] 
Configuration: {'weight': 'weight', 'resolution': 0.8, 'randomize': False} 
Scoring: FitnessResult(min=None, max=None, score=0.4712052653229123, std=None)


communities, scoring = ensemble.random_search(graph=g, method=algorithms.louvain,
                                                       parameters=[resolution, randomize],
                                                       quality_score=evaluation.erdos_renyi_modularity,
                                                       instances=5, aggregate=max)

print("Communities:\n %s \nConfiguration: %s \nScoring: %s" %(coms.communities, coms.method_parameters, scoring))

Communities:
 [[8, 14, 15, 18, 20, 22, 23, 26, 27, 29, 30, 32, 33], [0, 1, 2, 3, 7, 9, 11, 12, 13, 17, 19, 21], [4, 5, 6, 10, 16], [24, 25, 28, 31]] 
Configuration: {'weight': 'weight', 'resolution': 0.8, 'randomize': False} 
Scoring: FitnessResult(min=None, max=None, score=0.4584533113944878, std=None)


louvain_conf = [resolution, randomize]

epsilon = ensemble.Parameter(name="epsilon", start=0.2, end=0.7, step=0.1)
demon_conf = [epsilon]

methods = [algorithms.louvain, algorithms.demon]

for coms, scoring in ensemble.pool_grid_filter(g, methods, [louvain_conf, demon_conf], quality_score=evaluation.erdos_renyi_modularity, aggregate=max):
    print("%s\nCommunities:\n %s \nConfiguration: %s \nScoring: %s\n" %(coms.method_name, coms.communities, coms.method_parameters, scoring))

Louvain
Communities:
 [[8, 14, 15, 18, 20, 22, 23, 26, 27, 29, 30, 32, 33], [0, 1, 2, 3, 7, 9, 11, 12, 13, 17, 19, 21], [4, 5, 6, 10, 16], [24, 25, 28, 31]] 
Configuration: {'weight': 'weight', 'resolution': 0.8, 'randomize': False} 
Scoring: FitnessResult(min=None, max=None, score=0.4712052653229123, std=None)

DEMON
Communities:
 [[2, 8, 14, 15, 18, 20, 22, 23, 26, 27, 29, 30, 31, 32, 33], [8, 14, 15, 18, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33], [0, 1, 2, 3, 7, 8, 12, 13, 17, 19, 21, 30, 32, 33], [0, 4, 5, 6, 10, 16]] 
Configuration: {'epsilon': 0.5000000000000001, 'min_com_size': 3} 
Scoring: FitnessResult(min=None, max=None, score=0.757232963115316, std=None)

Community Detection: CDlib

Sommaire¶

1. Installation de CDlib 🔼¶

2. Community Discovery Workflow 🔼¶

2.A Création des graphes 🔼¶

2.B Algorithme(s) du Community Discovery: choix et configuration 🔼¶

2.C Évaluation du Clustering (Fitness functions) (to top)¶

2.D Évaluation du Clustering (Comparaison) (to top)¶

2.E Visualisation du Communauté/Statistiques (to top)¶

2.E.1 Visualisation des graphes¶

2.E.2 Visualisations de Fitness/comparaison des communautés¶

3. Installations avancées: Pooling et Optimization (to top)¶

3.A Pooling¶

3.A Optimization¶

4. Conclusions (to top)¶