-
Notifications
You must be signed in to change notification settings - Fork 0
/
CoreConceptsBasedClustering.py
46 lines (37 loc) · 1.52 KB
/
CoreConceptsBasedClustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def main():
#load the matrix and the terms of the matrix
X = np.loadtxt("./OutputDir/window_matrix.csv", delimiter=",")
Y = np.loadtxt("./OutputDir/window_matrix_terms.txt", delimiter="\t", dtype=str)
#initialize the core concepts
CoreConcepts = {"Musician", "Album", "Genre", "Instrument", "Performance"}
coreConceptsDict = dict()
coreConceptsSpecializationDict = {core:[] for core in CoreConcepts}
print(coreConceptsSpecializationDict)
otherConceptsDict = dict()
i = 0
#seperate the matirx data between core concepts data and other concepts data
for y in Y:
if y in CoreConcepts:
coreConceptsDict[y] = X[i]
else:
otherConceptsDict[y] = X[i]
i += 1
#calculate the similarity between each other concept and each concept
# then attach other concept to the core concept with highest similarity
for con, x in otherConceptsDict.items():
maxVal = 0
conCluster = ""
for coreCon, y in coreConceptsDict.items():
val = cosine_similarity(x.reshape(1, -1), y.reshape(1, -1))
if val > maxVal:
maxVal = val
conCluster = coreCon
if conCluster != "":
coreConceptsSpecializationDict[conCluster].append(con)
for core_concept, cluster in coreConceptsSpecializationDict.items():
print("cluster: " + core_concept)
print(cluster)
if __name__ == '__main__':
main()