-
Notifications
You must be signed in to change notification settings - Fork 1
/
louvain-split.py
145 lines (118 loc) · 4.19 KB
/
louvain-split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Louvain method for splitting network into communities.
import collections
import copy
import random
def calc_Q(graph, community_of):
k = dict()
for node in graph:
k[node] = sum(graph[node].values())
m = sum(k.values())
Q = 0.
for i in graph:
for j, A_ij in graph[i].items():
if (community_of[i] == community_of[j]):
Q += (A_ij - k[i] * k[j] / (2. * m))
Q /= (2. * m)
return Q
def calc_dQ(graph, community_of, node, new_comm):
Q0 = calc_Q(graph, community_of)
new_comm_of = copy.copy(community_of)
new_comm_of[node] = new_comm
Q1 = calc_Q(graph, new_comm_of)
return Q1 - Q0
def louvain_split(graph, level=0):
print("Level", level, len(graph))
for node in graph:
for neighbor, weight in graph[node].items():
assert neighbor in graph, (node, neighbor, graph[node], graph)
# Initialize everyone to their own community of 1.
# Maps node -> community
community_of = {node : node for node in graph}
# Greedily try to consolidate communities.
# Repeatedly iterate over all nodes until no progress is made.
progress = True
while progress:
progress = False
nodes = graph.keys()
random.shuffle(nodes)
for node in nodes:
# For each node, see which comminity gives it the max modularity.
best_dQ = 0
best_comm = community_of[node]
for neighbor in graph[node]:
#print community_of, neighbor
dQ = calc_dQ(graph, community_of, node, community_of[neighbor])
if dQ > best_dQ:
best_dQ = dQ
best_comm = community_of[neighbor]
if best_dQ > 0:
progress = True
# Move node to the best community.
community_of[node] = best_comm
print("Q =", calc_Q(graph, community_of))
communities = collections.defaultdict(list)
for node in community_of:
communities[community_of[node]].append(node)
# If there was no change in communities, then we're done.
if len(communities) == len(graph):
return graph, {node : [node] for node in graph}
# Meta step: Build a new graph where each community becomes a node.
meta_graph = dict()
for comm in communities:
meta_graph[comm] = collections.defaultdict(int)
for node in communities[comm]:
for neighbor, weight in graph[node].items():
meta_graph[comm][community_of[neighbor]] += weight
top_graph, mapping = louvain_split(meta_graph, level + 1)
return top_graph, {node : (mapping[community_of[node]] + [community_of[node]])
for node in graph}
def load_graph(filename):
graph = dict()
with file(filename, "r") as f:
for line in f:
parts = line.strip().split()
# Every edge has an initial weight of 1.
graph[parts[0]] = {neighbor : 1 for neighbor in parts[1:]}
return graph
def load_tsv(filename):
graph = collections.defaultdict(dict)
with file(filename, "r") as f:
for line in f:
if line[0] == "%":
continue
a, b = line.strip().split()
# Every edge has an initial weight of 1.
graph[a][b] = 1
graph[b][a] = 1
return graph
def main():
import pprint
import sys
graph = load_graph(sys.argv[1])
#graph = load_tsv(sys.argv[1])
top_graph, partition = louvain_split(graph)
community_of = {node: comm[0] for node, comm in partition.items()}
print("Q =", calc_Q(graph, community_of))
communities = collections.defaultdict(list)
for node in community_of:
communities[community_of[node]].append(node)
print("Num communities =", len(communities))
print("Min community size =", min(len(nodes) for nodes in communities.values()))
print("Max community size =", max(len(nodes) for nodes in communities.values()))
#pprint.pprint(top_graph)
#pprint.pprint(partition)
import data_reader
db = data_reader.Database()
comm_to_name = {comm : db.name_of(min(nodes))
for comm, nodes in communities.items()}
print("Graph")
for comm in top_graph:
neighs = top_graph[comm]
print(comm_to_name[comm], {comm_to_name[other] : neighs[other] for other in neighs})
print("Communities")
for comm, nodes in communities.items():
print("Community", comm_to_name[comm])
for node in nodes:
print("", db.num2id(node), db.name_of(node))
if __name__ == "__main__":
main()