-
Notifications
You must be signed in to change notification settings - Fork 2
/
helper.py
74 lines (61 loc) · 1.71 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from mrjob.job import MRJob
from MRKMeans import MRKMeans
import sys,os
import os.path
import shutil
from math import sqrt
input_c = "cent.txt"
CENTROIDS_FILE = "temp/cent.txt"
def get_c(job, runner):
c = []
for line in runner.stream_output():
key, value = job.parse_output_line(line)
c.append(key)
return c
def get_first_c(fname):
centroids = []
with open(fname, 'r') as f:
for line in f:
if line:
x, y = line.split('\t')
centroids.append([float(x), float(y)])
return centroids
def write_c(centroids):
with open(CENTROIDS_FILE,'w') as f:
centroids.sort()
for c in centroids:
k,cx,cy = c.split(',')
f.write("%s\t%s\n"%(cx,cy))
def dist_vec(v1,v2):
return sqrt((v2[0]-v1[0])*(v2[0]-v1[0])+(v2[1]-v1[1])*(v2[1]-v1[1]))
def diff(cs1,cs2):
max_dist = 0.0
for i in range(len(cs1)):
dist = dist_vec(cs1[i],cs2[i])
if dist > max_dist:
max_dist = dist
return max_dist
if __name__ == '__main__':
args = sys.argv[1:]
print args
os.remove(CENTROIDS_FILE)
shutil.copy(input_c,CENTROIDS_FILE)
old_c = get_first_c(input_c)
i=1
while True:
print "Iteration #%i" % i
mr_job=MRKMeans(args=args + ['--c='+CENTROIDS_FILE])
with mr_job.make_runner() as runner:
runner.run()
centroids = get_c(mr_job,runner)
write_c(centroids)
n_c = get_first_c(CENTROIDS_FILE)
for c in n_c:
print c[0],c[1]
max_d = diff(n_c,old_c)
print max_d
if max_d < 0.0001:
break
else:
old_c = n_c
i=i+1