Skip to content

Commit

Permalink
Use hanging indents in EuclideanKeyedVectors.similarity_matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
Witiko committed Jan 28, 2018
1 parent 621ed0d commit e1eb7cd
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,17 +620,19 @@ def similarity_matrix(self, dictionary, threshold=0.0, exponent=2.0, nonzero_lim
continue # A word from the dictionary not present in the word2vec model.
# Traverse upper triangle columns.
if matrix_order <= nonzero_limit + 1: # Traverse all columns.
columns = ((w2_index, self.similarity(w1, dictionary[w2_index]))
for w2_index in range(w1_index + 1, matrix_order)
if w1_index != w2_index and dictionary[w2_index] in self.vocab)
columns = (
(w2_index, self.similarity(w1, dictionary[w2_index]))
for w2_index in range(w1_index + 1, matrix_order)
if w1_index != w2_index and dictionary[w2_index] in self.vocab)
else: # Traverse only columns corresponding to the embeddings closest to w1.
num_nonzero = similarity_matrix[w1_index].getnnz() - 1
columns = ((dictionary.token2id[w2], similarity)
for _, (w2, similarity)
in zip(range(nonzero_limit - num_nonzero),
self.most_similar(positive=[w1],
topn=nonzero_limit - num_nonzero))
if w2 in dictionary.token2id and w1_index < dictionary.token2id[w2])
columns = (
(dictionary.token2id[w2], similarity)
for _, (w2, similarity)
in zip(
range(nonzero_limit - num_nonzero),
self.most_similar(positive=[w1], topn=nonzero_limit - num_nonzero))
if w2 in dictionary.token2id and w1_index < dictionary.token2id[w2])
columns = sorted(columns, key=lambda x: x[0])
for w2_index, similarity in columns:
assert w1_index < w2_index
Expand Down

0 comments on commit e1eb7cd

Please sign in to comment.