-
Notifications
You must be signed in to change notification settings - Fork 0
/
communities.py
98 lines (81 loc) · 2.95 KB
/
communities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import numpy as np
import networkx as nx
from sklearn.cluster import SpectralClustering
from sklearn import metrics
from sklearn.cluster import KMeans
import scipy
import collections
import matplotlib.pyplot as plt
def load_friendships_data():
try:
f=open("data/friendships.txt", "r")
content = f.read()
f.close()
return content
except:
return None
def parseFile(file):
userList = []
for review in file.split("\n\n"):
line = review.split("\n")
user = line[0].split("user:")[1].strip()
friends = list(((user, friend) for friend in line[1].split('\t')[1:]))
summary = line[2].split()[1]
review = line[3].split()[1]
userList.append((user, friends, summary, review))
return userList
def fillGraph(userList):
G = nx.Graph()
for (user, friends, summary, review) in userList:
G.add_node(user)
G.add_edges_from(friends)
return G
def spectral_clustering(G, num_clusters):
# Computes Laplacian matrix
L = laplacian_matrix(G)
# Computes all eigen vectors and eigen values
vals, vecs = np.linalg.eig(L)
# Sort these based on the eigenvalues
vecs = vecs[:,np.argsort(vals)]
vals = vals[np.argsort(vals)]
# Remove the first eigenvector. We need to transpose it to use it in k-means
relevant_vectors = vecs[:, 1:num_clusters].reshape(-1, num_clusters - 1)
# Compute split based on vector space
componspace = KMeans(n_clusters=num_clusters).fit_predict(relevant_vectors)
communities = compute_community_for_user(G.nodes(), componspace)
return communities
def laplacian_matrix(G):
A = nx.to_numpy_matrix(G)
# Compute sums of rows and enters them to a sums vector
sums = A.sum(axis=1)
# Compute diagonal matrix out of sums - i.e. node degrees
D = np.zeros((A.shape[0], A.shape[1]))
np.fill_diagonal(D, np.array(sums.reshape(1,-1))[0])
# Computes unnormalised graph Laplacian
L = D - A
return L
def scipy_spectral(G, num_clusters):
adj_mat = nx.to_numpy_matrix(G)
sc = SpectralClustering(num_clusters, affinity='precomputed',n_init=100).fit_predict(adj_mat)
communities = compute_community_for_user(G.nodes(), sc)
return communities
def compute_community_for_user(users, communities):
predictions = list(zip(users, communities))
communities = collections.defaultdict(list)
for p in predictions:
key, value = p
if key not in communities[value]:
communities[value].append(key)
return communities
def create_communities(number_of_communities, type='spectral'):
file = load_friendships_data()
userList = parseFile(file)
G = fillGraph(userList)
communities = {}
if type is 'spectral':
communities = spectral_clustering(G, number_of_communities)
if type is 'scipy':
communities = scipy_spectral(G, number_of_communities)
return communities
if __name__ == "__main__":
create_communities(4)