-
Notifications
You must be signed in to change notification settings - Fork 1
/
clustering.py
73 lines (59 loc) · 2.46 KB
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
def Cluster(features,sample_size):
tic = time.clock()
print('Starting clustering (using KMeans)...')
#Number of clusters for K-Means
N_CLUSTS = 5#250
#Number of clusters used for validation
N_VAL_CLUSTS = 1#50
SEED = 42
np.random.seed(SEED)
#how many images to take?
SAMP_SIZE = 8
#km = KMeans(n_clusters = N_CLUSTS, n_jobs = -1)
#clust_preds = km.fit_predict(StandardScaler().fit_transform(features))
#val_clusters = np.random.choice(range(N_CLUSTS), N_VAL_CLUSTS, replace = False)
#val_sample = np.array(sample_size)[np.in1d(clust_preds, val_clusters)]
#print('val sample' , val_sample)
kmeans = KMeans(n_clusters=2, random_state=0).fit(vgg16_feature_list_np)
reduced_data = PCA(n_components=512).fit_transform(features)
km = KMeans(n_clusters = N_CLUSTS, n_init=10)
km.fit(reduced_data)
# Step size of the mesh. Decrease to increase the quality of the VQ.
h = .02 # point in the mesh [x_min, x_max]x[y_min, y_max].
# Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Obtain labels for each point in mesh. Use last trained model.
Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1)
plt.clf()
plt.imshow(Z, interpolation='nearest',
extent=(xx.min(), xx.max(), yy.min(), yy.max()),
cmap=plt.cm.Paired,
aspect='auto', origin='lower')
plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
# Plot the centroids as a white X
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1],
marker='x', s=169, linewidths=3,
color='w', zorder=10)
plt.title('K-means clustering on the digits dataset (PCA-reduced data)\n'
'Centroids are marked with white cross')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()
toc = time.clock()
print("Total time taken for clustering = ", toc-tic)