From e41c435ba1011469577f3064734aa4933d456183 Mon Sep 17 00:00:00 2001 From: David C Hall Date: Wed, 20 Jun 2018 16:07:39 -0700 Subject: [PATCH 1/2] Fix citation --- gensim/models/phrases.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py index 5ff4e99051..40cc28e271 100644 --- a/gensim/models/phrases.py +++ b/gensim/models/phrases.py @@ -621,8 +621,8 @@ def __getitem__(self, sentence): def original_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count): - """Calculation score, based on original `"Efficient Estimaton of Word Representations in Vector Space" by - Mikolov `_. + """Calculation score, based on original `"Distributed Representations of Words and Phrases + and their Compositionality" by Mikolov `_. Parameters ---------- @@ -641,7 +641,7 @@ def original_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count Notes ----- - Formula: :math:`\\frac{(worda\_count - min\_count) * len\_vocab }{ (worda\_count * wordb\_count)}`. + Formula: :math:`\\frac{(bigram\_count - min\_count) * len\_vocab }{ (worda\_count * wordb\_count)}`. """ return (bigram_count - min_count) / worda_count / wordb_count * len_vocab From 9fc1ed666794f12f512ee1af97d90824c0cb63d7 Mon Sep 17 00:00:00 2001 From: David C Hall Date: Wed, 20 Jun 2018 16:11:56 -0700 Subject: [PATCH 2/2] Fix citation --- gensim/sklearn_api/phrases.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gensim/sklearn_api/phrases.py b/gensim/sklearn_api/phrases.py index 7579a09cc9..22bddb2c92 100644 --- a/gensim/sklearn_api/phrases.py +++ b/gensim/sklearn_api/phrases.py @@ -35,9 +35,9 @@ class PhrasesTransformer(TransformerMixin, BaseEstimator): """Base Phrases module, wraps :class:`~gensim.models.phrases.Phrases`. - For more information, please have a look to `Mikolov, et. al: "Efficient Estimation of Word Representations in - Vector Space" `_ and `Gerlof Bouma: "Normalized (Pointwise) Mutual Information - in Collocation Extraction" `_. + For more information, please have a look to `Mikolov, et. al: "Distributed Representations of Words and Phrases and + their Compositionality" `_ and `Gerlof Bouma: "Normalized (Pointwise) Mutual + Information in Collocation Extraction" `_. """ def __init__(self, min_count=5, threshold=10.0, max_vocab_size=40000000, @@ -63,8 +63,8 @@ def __init__(self, min_count=5, threshold=10.0, max_vocab_size=40000000, or with a function with the expected parameter names. Two built-in scoring functions are available by setting `scoring` to a string: - * 'default': Explained in `Mikolov, et. al: "Efficient Estimation of Word Representations - in Vector Space" `_. + * 'default': Explained in `Mikolov, et. al: "Distributed Representations of Words and Phrases + and their Compositionality" `_. * 'npmi': Explained in `Gerlof Bouma: "Normalized (Pointwise) Mutual Information in Collocation Extraction" `_.