Skip to content

Commit

Permalink
Remove unused variables and parameters for Soft Cosine Measure
Browse files Browse the repository at this point in the history
  • Loading branch information
Witiko committed Jan 28, 2018
1 parent 50a7274 commit 08dea4e
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 9 deletions.
5 changes: 2 additions & 3 deletions docs/notebooks/soft_cosine_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
"outputs": [],
"source": [
"from time import time\n",
"start_nb = time()\n",
"\n",
"# Initialize logging.\n",
"import logging\n",
Expand Down Expand Up @@ -149,7 +148,7 @@
" raise ValueError(\"SKIP: You need to download the google news model\")\n",
" \n",
"model = KeyedVectors.load_word2vec_format('/data/GoogleNews-vectors-negative300.bin.gz', binary=True)\n",
"similarity_matrix = model.similarity_matrix(corpus, dictionary)\n",
"similarity_matrix = model.similarity_matrix(dictionary)\n",
"del model\n",
"\n",
"print('Cell took %.2f seconds to run.' % (time() - start))"
Expand Down Expand Up @@ -401,7 +400,7 @@
"num_best = 10\n",
"dictionary = corpora.Dictionary(scm_corpus)\n",
"scm_corpus = [dictionary.doc2bow(document) for document in scm_corpus]\n",
"similarity_matrix = model.wv.similarity_matrix(scm_corpus, dictionary)\n",
"similarity_matrix = model.wv.similarity_matrix(dictionary)\n",
"instance = SoftCosineSimilarity(scm_corpus, similarity_matrix, num_best=num_best)\n",
"\n",
"print('Cell took %.2f seconds to run.' %(time() - start))"
Expand Down
9 changes: 4 additions & 5 deletions gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,19 +560,18 @@ def similar_by_vector(self, vector, topn=10, restrict_vocab=None):
"""
return self.most_similar(positive=[vector], topn=topn, restrict_vocab=restrict_vocab)

def similarity_matrix(self, corpus, dictionary, threshold=0.0, exponent=2.0,
nonzero_limit=100, dtype=REAL):
def similarity_matrix(self, dictionary, threshold=0.0, exponent=2.0, nonzero_limit=100,
dtype=REAL):
"""Constructs a term similarity matrix for computing Soft Cosine Measure.
Constructs a a sparse term similarity matrix in the `scipy.sparse.csc_matrix` format for
computing Soft Cosine Measure between documents.
Parameters
----------
corpus : list of lists of (int, float) two-tuples
A list of documents in the gensim document format.
dictionary : gensim.corpora.Dictionary
A dictionary associated with the corpus.
A dictionary that specifies a mapping between words and the indices of rows and columns
of the resulting term similarity matrix.
threshold : float, optional
Only pairs of words whose embeddings are more similar than `threshold` are considered
when building the sparse term similarity matrix. Defaults to `0.0`.
Expand Down
2 changes: 1 addition & 1 deletion gensim/similarities/docsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ class SoftCosineSimilarity(interfaces.SimilarityABC):
>>> # Construct a bag-of-words corpus, a dictionary, and a term similarity matrix.
>>> dictionary = Dictionary(corpus)
>>> corpus = [dictionary.doc2bow(document) for document in corpus]
>>> similarity_matrix = model.wv.similarity_matrix(corpus, dictionary)
>>> similarity_matrix = model.wv.similarity_matrix(dictionary)
>>> index = SoftCosineSimilarity(corpus, similarity_matrix, num_best=10)
>>> # Make a query.
>>> query = 'Yummy! Great view of the Bellagio Fountain show.'
Expand Down

0 comments on commit 08dea4e

Please sign in to comment.