-
Notifications
You must be signed in to change notification settings - Fork 4
/
ex17-bag-of-words
43 lines (35 loc) · 1.71 KB
/
ex17-bag-of-words
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python
# --------------------------------------------------------------------------------------------------------- Intermediate
# this data here is the bag of words representation of This Little Piggy
data = [[1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1],
[1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 0, 1, 3, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1]]
def distance(row1, row2):
row1 = np.asarray(row1); row2 = np.asarray(row2)
difference = sum(abs(row1 - row2))
return difference
def all_pairs(data):
dist = [[distance(sent1, sent2) for sent1 in data] for sent2 in data]
print(dist)
all_pairs(data)
# --------------------------------------------------------------------------------------------------------- Advanced
import numpy as np
data = [[1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1],
[1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 0, 1, 3, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1]]
def distance(row1, row2):
row1 = np.asarray(row1); row2 = np.asarray(row2)
difference = sum(abs(row1 - row2))
return difference
def all_pairs(data):
dist = [[distance(sent1, sent2) for sent1 in data] for sent2 in data]
return dist
def find_nearest_pair(data):
all_of_them = np.asarray(all_pairs(data)).astype('float')
all_of_them[all_of_them==0] = np.nan
print(np.unravel_index(np.nanargmin(all_of_them), all_of_them.shape))
find_nearest_pair(data)