From 08dea4ebf550153b71d581fa29831674b80a9669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADt=20Novotn=C3=BD?= Date: Sun, 28 Jan 2018 17:23:45 +0100 Subject: [PATCH] Remove unused variables and parameters for Soft Cosine Measure --- docs/notebooks/soft_cosine_tutorial.ipynb | 5 ++--- gensim/models/keyedvectors.py | 9 ++++----- gensim/similarities/docsim.py | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/docs/notebooks/soft_cosine_tutorial.ipynb b/docs/notebooks/soft_cosine_tutorial.ipynb index 9b9d98fe37..36d68276bf 100644 --- a/docs/notebooks/soft_cosine_tutorial.ipynb +++ b/docs/notebooks/soft_cosine_tutorial.ipynb @@ -41,7 +41,6 @@ "outputs": [], "source": [ "from time import time\n", - "start_nb = time()\n", "\n", "# Initialize logging.\n", "import logging\n", @@ -149,7 +148,7 @@ " raise ValueError(\"SKIP: You need to download the google news model\")\n", " \n", "model = KeyedVectors.load_word2vec_format('/data/GoogleNews-vectors-negative300.bin.gz', binary=True)\n", - "similarity_matrix = model.similarity_matrix(corpus, dictionary)\n", + "similarity_matrix = model.similarity_matrix(dictionary)\n", "del model\n", "\n", "print('Cell took %.2f seconds to run.' % (time() - start))" @@ -401,7 +400,7 @@ "num_best = 10\n", "dictionary = corpora.Dictionary(scm_corpus)\n", "scm_corpus = [dictionary.doc2bow(document) for document in scm_corpus]\n", - "similarity_matrix = model.wv.similarity_matrix(scm_corpus, dictionary)\n", + "similarity_matrix = model.wv.similarity_matrix(dictionary)\n", "instance = SoftCosineSimilarity(scm_corpus, similarity_matrix, num_best=num_best)\n", "\n", "print('Cell took %.2f seconds to run.' %(time() - start))" diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index d69c25775e..5a399cf78d 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -560,8 +560,8 @@ def similar_by_vector(self, vector, topn=10, restrict_vocab=None): """ return self.most_similar(positive=[vector], topn=topn, restrict_vocab=restrict_vocab) - def similarity_matrix(self, corpus, dictionary, threshold=0.0, exponent=2.0, - nonzero_limit=100, dtype=REAL): + def similarity_matrix(self, dictionary, threshold=0.0, exponent=2.0, nonzero_limit=100, + dtype=REAL): """Constructs a term similarity matrix for computing Soft Cosine Measure. Constructs a a sparse term similarity matrix in the `scipy.sparse.csc_matrix` format for @@ -569,10 +569,9 @@ def similarity_matrix(self, corpus, dictionary, threshold=0.0, exponent=2.0, Parameters ---------- - corpus : list of lists of (int, float) two-tuples - A list of documents in the gensim document format. dictionary : gensim.corpora.Dictionary - A dictionary associated with the corpus. + A dictionary that specifies a mapping between words and the indices of rows and columns + of the resulting term similarity matrix. threshold : float, optional Only pairs of words whose embeddings are more similar than `threshold` are considered when building the sparse term similarity matrix. Defaults to `0.0`. diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py index fffda273cc..61d5b26a7d 100755 --- a/gensim/similarities/docsim.py +++ b/gensim/similarities/docsim.py @@ -595,7 +595,7 @@ class SoftCosineSimilarity(interfaces.SimilarityABC): >>> # Construct a bag-of-words corpus, a dictionary, and a term similarity matrix. >>> dictionary = Dictionary(corpus) >>> corpus = [dictionary.doc2bow(document) for document in corpus] - >>> similarity_matrix = model.wv.similarity_matrix(corpus, dictionary) + >>> similarity_matrix = model.wv.similarity_matrix(dictionary) >>> index = SoftCosineSimilarity(corpus, similarity_matrix, num_best=10) >>> # Make a query. >>> query = 'Yummy! Great view of the Bellagio Fountain show.'