From c83394ce60fbddfc553df07961662b4f02587327 Mon Sep 17 00:00:00 2001 From: Kieran BW <41634689+FredHappyface@users.noreply.github.com> Date: Tue, 2 Mar 2021 01:33:16 +0000 Subject: [PATCH 1/9] Add py39 wheels to travis/azure --- .travis.yml | 5 ++++- azure-pipelines.yml | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 120597a745..b887cac1b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,6 +29,9 @@ matrix: - os: linux env: - MB_PYTHON_VERSION=3.8 + - os: linux + env: + - MB_PYTHON_VERSION=3.9 before_install: - source multibuild/common_utils.sh - source multibuild/travis_steps.sh @@ -46,4 +49,4 @@ notifications: email: - penkov+gensimwheels@pm.me on_success: always - on_failure: always \ No newline at end of file + on_failure: always diff --git a/azure-pipelines.yml b/azure-pipelines.yml index dfe8f22fb4..8e8102fa12 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -12,6 +12,9 @@ strategy: py38: python.version: '3.8' TOXENV: "py38-win" + py39: + python.version: '3.9' + TOXENV: "py39-win" steps: - task: UsePythonVersion@0 From 8a5a10326e0167d40b141fcd7e53c6c73240cc53 Mon Sep 17 00:00:00 2001 From: Kieran BW <41634689+FredHappyface@users.noreply.github.com> Date: Sat, 6 Mar 2021 13:24:56 +0000 Subject: [PATCH 2/9] fix nms, pyemd, Levenshtein for py39win --- setup.py | 15 +++++++-------- tox.ini | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index 97e8a5523b..c5c2309a53 100644 --- a/setup.py +++ b/setup.py @@ -17,9 +17,9 @@ import platform import shutil import sys -from setuptools import setup, find_packages, Extension -from setuptools.command.build_ext import build_ext +from setuptools import Extension, find_packages, setup +from setuptools.command.build_ext import build_ext c_extensions = { 'gensim.models.word2vec_inner': 'gensim/models/word2vec_inner.c', @@ -270,20 +270,19 @@ def run(self): # 'pytest-rerunfailures', # disabled 2020-08-28 for 'mock', 'cython', - 'nmslib', - 'pyemd', 'testfixtures', 'Morfessor==2.0.2a4', - 'python-Levenshtein >= 0.10.2', ] # Add additional requirements for testing on Linux that are skipped on Windows. -linux_testenv = core_testenv[:] + visdom_req + ['pyemd', ] +linux_testenv = core_testenv[:] + visdom_req + ['pyemd', 'nmslib', 'python-Levenshtein >= 0.10.2',] # Skip problematic/uninstallable packages (& thus related conditional tests) in Windows builds. # We still test them in Linux via Travis, see linux_testenv above. # See https://github.com/RaRe-Technologies/gensim/pull/2814 -win_testenv = core_testenv[:] +win_testenv = core_testenv[:] + ['pyemd', 'nmslib', 'python-Levenshtein >= 0.10.2',] +if sys.version_info > (3,8): + win_testenv = core_testenv[:] # # This list partially duplicates requirements_docs.txt. @@ -308,7 +307,7 @@ def run(self): 'nltk', 'testfixtures', 'statsmodels', - 'pyemd', + #'pyemd', 'pandas', ] diff --git a/tox.ini b/tox.ini index f73dedf3d7..12811b8ba5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] minversion = 2.0 -envlist = {py36,py37,py38}-{win,linux}, flake8, docs, docs-upload, download-wheels, upload-wheels, test-pypi +envlist = {py36,py37,py38, py39}-{win,linux}, flake8, docs, docs-upload, download-wheels, upload-wheels, test-pypi skipsdist = True platform = linux: linux win: win64 From 516ead41233c56d3241e3508f76ebb702bf17cc9 Mon Sep 17 00:00:00 2001 From: Kieran BW <41634689+FredHappyface@users.noreply.github.com> Date: Sat, 6 Mar 2021 13:41:53 +0000 Subject: [PATCH 3/9] fix docs gen --- setup.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index c5c2309a53..073160c7fc 100644 --- a/setup.py +++ b/setup.py @@ -274,15 +274,21 @@ def run(self): 'Morfessor==2.0.2a4', ] +not_py39_win_testenv = [ + 'pyemd', + 'nmslib', + 'python-Levenshtein >= 0.10.2', +] + # Add additional requirements for testing on Linux that are skipped on Windows. -linux_testenv = core_testenv[:] + visdom_req + ['pyemd', 'nmslib', 'python-Levenshtein >= 0.10.2',] +linux_testenv = core_testenv[:] + visdom_req + not_py39_win_testenv # Skip problematic/uninstallable packages (& thus related conditional tests) in Windows builds. # We still test them in Linux via Travis, see linux_testenv above. # See https://github.com/RaRe-Technologies/gensim/pull/2814 -win_testenv = core_testenv[:] + ['pyemd', 'nmslib', 'python-Levenshtein >= 0.10.2',] +win_testenv = core_testenv[:] + not_py39_win_testenv if sys.version_info > (3,8): - win_testenv = core_testenv[:] + win_testenv = core_testenv[:] # # This list partially duplicates requirements_docs.txt. @@ -295,7 +301,7 @@ def run(self): # https://packaging.python.org/discussions/install-requires-vs-requirements/ # -docs_testenv = core_testenv + distributed_env + visdom_req + [ +docs_testenv = core_testenv + not_py39_win_testenv + distributed_env + visdom_req + [ 'sphinx <= 2.4.4', # avoid `sphinx >= 3.0` that breaks the build 'sphinx-gallery', 'sphinxcontrib.programoutput', @@ -307,7 +313,6 @@ def run(self): 'nltk', 'testfixtures', 'statsmodels', - #'pyemd', 'pandas', ] From 02bb45d92efd60e77b396b9f74b9011316be61bd Mon Sep 17 00:00:00 2001 From: Kieran BW <41634689+FredHappyface@users.noreply.github.com> Date: Sun, 7 Mar 2021 19:53:40 +0000 Subject: [PATCH 4/9] fix some tests for Levenshtein --- gensim/similarities/levenshtein.py | 6 +++++- gensim/test/test_similarities.py | 16 ++++++++++++++++ setup.py | 2 +- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/gensim/similarities/levenshtein.py b/gensim/similarities/levenshtein.py index ca39e68dd0..d08d80afa5 100644 --- a/gensim/similarities/levenshtein.py +++ b/gensim/similarities/levenshtein.py @@ -43,7 +43,11 @@ def levdist(t1, t2, max_distance=float("inf")): The Levenshtein distance between `t1` and `t2`. """ - import Levenshtein + try: + import Levenshtein + except ImportError: + raise ImportError("Levenshtein not installed. Please run `pip install Levenshtein`.") + distance = Levenshtein.distance(t1, t2) if distance > max_distance: diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index ee10550da3..c3201d66b5 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -1546,6 +1546,10 @@ def test_inner_product_corpus_corpus_true_true(self): class TestLevenshteinDistance(unittest.TestCase): def test_max_distance(self): + try: + import Levenshtein # noqa:F401 + except ImportError as e: + raise unittest.SkipTest("Levenshtein library is not available: %s" % e) t1 = "holiday" t2 = "day" max_distance = max(len(t1), len(t2)) @@ -1558,12 +1562,20 @@ def test_max_distance(self): class TestLevenshteinSimilarity(unittest.TestCase): def test_empty_strings(self): + try: + import Levenshtein # noqa:F401 + except ImportError as e: + raise unittest.SkipTest("Levenshtein library is not available: %s" % e) t1 = "" t2 = "" self.assertEqual(1.0, levsim(t1, t2)) def test_negative_hyperparameters(self): + try: + import Levenshtein # noqa:F401 + except ImportError as e: + raise unittest.SkipTest("Levenshtein library is not available: %s" % e) t1 = "holiday" t2 = "day" alpha = 2.0 @@ -1579,6 +1591,10 @@ def test_negative_hyperparameters(self): levsim(t1, t2, -alpha, -beta) def test_min_similarity(self): + try: + import Levenshtein # noqa:F401 + except ImportError as e: + raise unittest.SkipTest("Levenshtein library is not available: %s" % e) t1 = "holiday" t2 = "day" alpha = 2.0 diff --git a/setup.py b/setup.py index 073160c7fc..09140c0ac9 100644 --- a/setup.py +++ b/setup.py @@ -287,7 +287,7 @@ def run(self): # We still test them in Linux via Travis, see linux_testenv above. # See https://github.com/RaRe-Technologies/gensim/pull/2814 win_testenv = core_testenv[:] + not_py39_win_testenv -if sys.version_info > (3,8): +if sys.version_info > (3,8,999): # py 3.8.1 is greater than 3.8 so set micro to 999 win_testenv = core_testenv[:] # From da33758cbf8e98fdc3b83619f5fa5fea2e81e01e Mon Sep 17 00:00:00 2001 From: Kieran BW <41634689+FredHappyface@users.noreply.github.com> Date: Sun, 7 Mar 2021 20:32:50 +0000 Subject: [PATCH 5/9] fix tests + flake8 --- gensim/similarities/levenshtein.py | 2 -- gensim/test/test_similarities.py | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gensim/similarities/levenshtein.py b/gensim/similarities/levenshtein.py index d08d80afa5..5d92cbf6e2 100644 --- a/gensim/similarities/levenshtein.py +++ b/gensim/similarities/levenshtein.py @@ -47,8 +47,6 @@ def levdist(t1, t2, max_distance=float("inf")): import Levenshtein except ImportError: raise ImportError("Levenshtein not installed. Please run `pip install Levenshtein`.") - - distance = Levenshtein.distance(t1, t2) if distance > max_distance: return max(len(t1), len(t2)) diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index c3201d66b5..9e4646ad8c 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -1625,7 +1625,10 @@ def setUp(self): def test_most_similar(self): """Test most_similar returns expected results.""" - + try: + import Levenshtein # noqa:F401 + except ImportError as e: + raise unittest.SkipTest("Levenshtein library is not available: %s" % e) index = LevenshteinSimilarityIndex(self.dictionary) results = list(index.most_similar(u"holiday", topn=1)) self.assertLess(0, len(results)) From 0b1317733571e1a6b30ec743a700d726c8d5ab31 Mon Sep 17 00:00:00 2001 From: Kieran BW <41634689+FredHappyface@users.noreply.github.com> Date: Tue, 9 Mar 2021 13:43:18 +0000 Subject: [PATCH 6/9] address comments --- gensim/similarities/__init__.py | 14 ++++++++++++-- gensim/test/test_similarities.py | 25 +++++-------------------- setup.py | 19 +++++++++---------- 3 files changed, 26 insertions(+), 32 deletions(-) diff --git a/gensim/similarities/__init__.py b/gensim/similarities/__init__.py index 3ab45261ad..c5218c4728 100644 --- a/gensim/similarities/__init__.py +++ b/gensim/similarities/__init__.py @@ -3,7 +3,18 @@ """ # bring classes directly into package namespace, to save some typing - +import warnings +try: + import Levenshtein +except ImportError: + warnings.warn( + "The gensim.similarities.levenshtein submodule is disabled, because the optional " + "Levenshtein package is unavailable. " + "Install Levenhstein (e.g. `pip install Levenshtein`) to suppress this warning." + ) + LevenshteinSimilarityIndex = None +else: + from .levenshtein import LevenshteinSimilarityIndex from .docsim import ( # noqa:F401 Similarity, MatrixSimilarity, @@ -15,4 +26,3 @@ UniformTermSimilarityIndex, WordEmbeddingSimilarityIndex, SparseTermSimilarityMatrix) -from .levenshtein import LevenshteinSimilarityIndex # noqa:F401 diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 9e4646ad8c..4929082c2a 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -1545,11 +1545,8 @@ def test_inner_product_corpus_corpus_true_true(self): class TestLevenshteinDistance(unittest.TestCase): + @unittest.skipIf(LevenshteinSimilarityIndex is None, "gensim.similarities.levenshtein is disabled") def test_max_distance(self): - try: - import Levenshtein # noqa:F401 - except ImportError as e: - raise unittest.SkipTest("Levenshtein library is not available: %s" % e) t1 = "holiday" t2 = "day" max_distance = max(len(t1), len(t2)) @@ -1561,21 +1558,15 @@ def test_max_distance(self): class TestLevenshteinSimilarity(unittest.TestCase): + @unittest.skipIf(LevenshteinSimilarityIndex is None, "gensim.similarities.levenshtein is disabled") def test_empty_strings(self): - try: - import Levenshtein # noqa:F401 - except ImportError as e: - raise unittest.SkipTest("Levenshtein library is not available: %s" % e) t1 = "" t2 = "" self.assertEqual(1.0, levsim(t1, t2)) + @unittest.skipIf(LevenshteinSimilarityIndex is None, "gensim.similarities.levenshtein is disabled") def test_negative_hyperparameters(self): - try: - import Levenshtein # noqa:F401 - except ImportError as e: - raise unittest.SkipTest("Levenshtein library is not available: %s" % e) t1 = "holiday" t2 = "day" alpha = 2.0 @@ -1590,11 +1581,8 @@ def test_negative_hyperparameters(self): with self.assertRaises(AssertionError): levsim(t1, t2, -alpha, -beta) + @unittest.skipIf(LevenshteinSimilarityIndex is None, "gensim.similarities.levenshtein is disabled") def test_min_similarity(self): - try: - import Levenshtein # noqa:F401 - except ImportError as e: - raise unittest.SkipTest("Levenshtein library is not available: %s" % e) t1 = "holiday" t2 = "day" alpha = 2.0 @@ -1623,12 +1611,9 @@ def setUp(self): self.documents = [[u"government", u"denied", u"holiday"], [u"holiday", u"slowing", u"hollingworth"]] self.dictionary = Dictionary(self.documents) + @unittest.skipIf(LevenshteinSimilarityIndex is None, "gensim.similarities.levenshtein is disabled") def test_most_similar(self): """Test most_similar returns expected results.""" - try: - import Levenshtein # noqa:F401 - except ImportError as e: - raise unittest.SkipTest("Levenshtein library is not available: %s" % e) index = LevenshteinSimilarityIndex(self.dictionary) results = list(index.most_similar(u"holiday", topn=1)) self.assertLess(0, len(results)) diff --git a/setup.py b/setup.py index 09140c0ac9..cfe42135be 100644 --- a/setup.py +++ b/setup.py @@ -274,21 +274,20 @@ def run(self): 'Morfessor==2.0.2a4', ] -not_py39_win_testenv = [ - 'pyemd', - 'nmslib', - 'python-Levenshtein >= 0.10.2', -] +if not (sys.platform.lower().startswith("win") and sys.version_info[:2] >= (3, 9)): + core_testenv.extend([ + 'pyemd', + 'nmslib', + 'python-Levenshtein >= 0.10.2', + ]) # Add additional requirements for testing on Linux that are skipped on Windows. -linux_testenv = core_testenv[:] + visdom_req + not_py39_win_testenv +linux_testenv = core_testenv[:] + visdom_req # Skip problematic/uninstallable packages (& thus related conditional tests) in Windows builds. # We still test them in Linux via Travis, see linux_testenv above. # See https://github.com/RaRe-Technologies/gensim/pull/2814 -win_testenv = core_testenv[:] + not_py39_win_testenv -if sys.version_info > (3,8,999): # py 3.8.1 is greater than 3.8 so set micro to 999 - win_testenv = core_testenv[:] +win_testenv = core_testenv[:] # # This list partially duplicates requirements_docs.txt. @@ -301,7 +300,7 @@ def run(self): # https://packaging.python.org/discussions/install-requires-vs-requirements/ # -docs_testenv = core_testenv + not_py39_win_testenv + distributed_env + visdom_req + [ +docs_testenv = core_testenv + distributed_env + visdom_req + [ 'sphinx <= 2.4.4', # avoid `sphinx >= 3.0` that breaks the build 'sphinx-gallery', 'sphinxcontrib.programoutput', From c94991b98ebff592ca5f3cbd9dc19fad90f96ed2 Mon Sep 17 00:00:00 2001 From: Kieran BW <41634689+FredHappyface@users.noreply.github.com> Date: Tue, 9 Mar 2021 13:46:51 +0000 Subject: [PATCH 7/9] manually revert gensim/similarities/levenshtein.py --- gensim/similarities/levenshtein.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gensim/similarities/levenshtein.py b/gensim/similarities/levenshtein.py index 5d92cbf6e2..ca39e68dd0 100644 --- a/gensim/similarities/levenshtein.py +++ b/gensim/similarities/levenshtein.py @@ -43,10 +43,8 @@ def levdist(t1, t2, max_distance=float("inf")): The Levenshtein distance between `t1` and `t2`. """ - try: - import Levenshtein - except ImportError: - raise ImportError("Levenshtein not installed. Please run `pip install Levenshtein`.") + import Levenshtein + distance = Levenshtein.distance(t1, t2) if distance > max_distance: return max(len(t1), len(t2)) From ba821d491d1dc4a6a4c2de366fc9476340c41316 Mon Sep 17 00:00:00 2001 From: Kieran BW <41634689+FredHappyface@users.noreply.github.com> Date: Tue, 9 Mar 2021 13:52:19 +0000 Subject: [PATCH 8/9] add # noqa:F401 to code in __init__ --- gensim/similarities/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/similarities/__init__.py b/gensim/similarities/__init__.py index c5218c4728..d235a1f69e 100644 --- a/gensim/similarities/__init__.py +++ b/gensim/similarities/__init__.py @@ -5,7 +5,7 @@ # bring classes directly into package namespace, to save some typing import warnings try: - import Levenshtein + import Levenshtein # noqa:F401 except ImportError: warnings.warn( "The gensim.similarities.levenshtein submodule is disabled, because the optional " @@ -14,7 +14,7 @@ ) LevenshteinSimilarityIndex = None else: - from .levenshtein import LevenshteinSimilarityIndex + from .levenshtein import LevenshteinSimilarityIndex # noqa:F401 from .docsim import ( # noqa:F401 Similarity, MatrixSimilarity, From ccc35ba618929f1ea991cff84129bb2a54de66b1 Mon Sep 17 00:00:00 2001 From: Kieran W <41634689+FredHappyface@users.noreply.github.com> Date: Wed, 10 Mar 2021 13:06:19 +0000 Subject: [PATCH 9/9] Update gensim/similarities/__init__.py Co-authored-by: Michael Penkov --- gensim/similarities/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/similarities/__init__.py b/gensim/similarities/__init__.py index d235a1f69e..15fa6af536 100644 --- a/gensim/similarities/__init__.py +++ b/gensim/similarities/__init__.py @@ -10,7 +10,7 @@ warnings.warn( "The gensim.similarities.levenshtein submodule is disabled, because the optional " "Levenshtein package is unavailable. " - "Install Levenhstein (e.g. `pip install Levenshtein`) to suppress this warning." + "Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning." ) LevenshteinSimilarityIndex = None else: