Skip to content

Commit

Permalink
Make LevenshteinSimilarityIndex.most_similar easier to read
Browse files Browse the repository at this point in the history
  • Loading branch information
Witiko committed Jan 12, 2019
1 parent f3258d9 commit 0d7f611
Showing 1 changed file with 8 additions and 12 deletions.
20 changes: 8 additions & 12 deletions gensim/similarities/levenshtein.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import logging
from math import floor

from gensim.matutils import argsort
from gensim.similarities.termsim import TermSimilarityIndex

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -139,15 +140,10 @@ def __init__(self, dictionary, alpha=1.8, beta=5.0, threshold=0.0):
super(LevenshteinSimilarityIndex, self).__init__()

def most_similar(self, t1, topn=10):
similarities = (
(levsim(t1, t2, self.alpha, self.beta, self.threshold), t2)
for t2 in self.dictionary.values()
if t1 != t2
)
most_similar = (
(t2, similarity)
for (similarity, t2) in sorted(similarities, reverse=True)
if similarity > 0
)
for _, (t2, similarity) in zip(range(topn), most_similar):
yield (t2, similarity)
terms = [t2 for t2 in self.dictionary.values() if t1 != t2]
similarities = [levsim(t1, t2, self.alpha, self.beta, self.threshold) for t2 in terms]
for index in argsort(similarities, topn, reverse=True):
t2 = terms[index]
similarity = similarities[index]
if similarity > 0:
yield (t2, similarity)

0 comments on commit 0d7f611

Please sign in to comment.