From 424755dc14d281fb62df7afb7274118fa31c9e6c Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 22 Mar 2020 12:54:09 +0900 Subject: [PATCH 1/8] flake8 fixes --- smart_open/tests/test_smart_open.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 927a18c9..f590f705 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -308,7 +308,7 @@ def test_pathlib_monkeypath_read_gz(self): # Check that standart implementation can't work with gzip with path.open("r") as infile: - with self.assertRaises(Exception) as context: + with self.assertRaises(Exception): lines = infile.readlines() # Check that out implementation works with gzip @@ -316,6 +316,7 @@ def test_pathlib_monkeypath_read_gz(self): with path.open("r") as infile: lines = infile.readlines() + self.assertEqual(len(lines), 3) _patch_pathlib(obj.old_impl) From c642fd07c118c17c72e975b96bc4ab116347e484 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 22 Mar 2020 13:17:20 +0900 Subject: [PATCH 2/8] fix wording in README.rst --- README.rst | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index 51a95e03..9c46c0dc 100644 --- a/README.rst +++ b/README.rst @@ -385,21 +385,22 @@ Otherwise, the transparent decompression will not occur. Drop-in replacement of ``pathlib.Path.open`` -------------------------------------------- -Now you can natively use ``smart_open.open`` with your ``Path`` objects. -You can't transparently read text from compressed file with original ``Path.open``, but can after ``patch_pathlib``. +``smart_open.open`` can also be used with ``Path`` objects. +The built-in `Path.open()` is not able to read text from compressed files, so use ``patch_pathlib`` to replace it with `smart_open.open()` instead. +This can be helpful when e.g. working with compressed files. .. code-block:: python - >>> from pathlib import Path - >>> from smart_open.smart_open_lib import patch_pathlib - >>> - >>> _ = patch_pathlib() # replace `Path.open` with `smart_open.open` - >>> - >>> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") - >>> - >>> with path.open("r") as infile: - ... print(infile.readline()[:41]) - В начале июля, в чрезвычайно жаркое время + >>> from pathlib import Path + >>> from smart_open.smart_open_lib import patch_pathlib + >>> + >>> _ = patch_pathlib() # replace `Path.open` with `smart_open.open` + >>> + >>> path = Path("smart_open/tests/test_data/crime-and-punishment.txt.gz") + >>> + >>> with path.open("r") as infile: + ... print(infile.readline()[:41]) + В начале июля, в чрезвычайно жаркое время Comments, bug reports ===================== From bf6b3dbc2ae93cdb44ed8782fb5473cf293fd786 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 22 Mar 2020 13:17:34 +0900 Subject: [PATCH 3/8] simplify monkey patch --- smart_open/smart_open_lib.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 11c335b0..bb1fef13 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -945,11 +945,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): def _patch_pathlib(func): """Replace `Path.open` with `func`""" - pathlib = sys.modules.get("pathlib", None) - - if not pathlib: - raise RuntimeError("Can't patch 'pathlib.Path.open', you should import 'pathlib' first") - old_impl = pathlib.Path.open pathlib.Path.open = func return old_impl From dfcaf2e7134a220954ab69e3e064d297f47cd76d Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 22 Mar 2020 13:17:42 +0900 Subject: [PATCH 4/8] fix typos in comments --- smart_open/tests/test_smart_open.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index f590f705..1c4e8135 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -306,12 +306,12 @@ def test_pathlib_monkeypath(self): def test_pathlib_monkeypath_read_gz(self): path = pathlib.Path(CURR_DIR) / 'test_data' / 'crime-and-punishment.txt.gz' - # Check that standart implementation can't work with gzip + # Check that standard implementation can't work with gzip with path.open("r") as infile: with self.assertRaises(Exception): lines = infile.readlines() - # Check that out implementation works with gzip + # Check that our implementation works with gzip obj = patch_pathlib() with path.open("r") as infile: lines = infile.readlines() From 9c597eae9a22c19c096c416c81d3496198ab6cd4 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 22 Mar 2020 14:51:37 +0900 Subject: [PATCH 5/8] check PATHLIB_SUPPORT before monkey patch --- smart_open/smart_open_lib.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index bb1fef13..e66f6525 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -945,6 +945,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): def _patch_pathlib(func): """Replace `Path.open` with `func`""" + if not PATHLIB_SUPPORT: + raise RuntimeError('install pathlib (or pathlib2) before using this function') old_impl = pathlib.Path.open pathlib.Path.open = func return old_impl From 4eb8ac2685297bb2c813468e418e03ad728e2b8c Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 22 Mar 2020 15:32:51 +0900 Subject: [PATCH 6/8] conditionally disable pathlib unit tests --- smart_open/tests/test_smart_open.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 1c4e8135..da99fdec 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -13,7 +13,6 @@ import tempfile import os import hashlib -import pathlib import boto3 import mock @@ -289,7 +288,10 @@ def test_gs_uri_contains_slash(self): self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.blob_id, "mydir/myblob") + @unittest.skipUnless(smart_open_lib.PATHLIB_SUPPORT, "this test requires pathlib") def test_pathlib_monkeypath(self): + from smart_open.smart_open_lib import pathlib + assert pathlib.Path.open != smart_open.open with patch_pathlib(): @@ -303,7 +305,10 @@ def test_pathlib_monkeypath(self): _patch_pathlib(obj.old_impl) assert pathlib.Path.open != smart_open.open + @unittest.skipUnless(smart_open_lib.PATHLIB_SUPPORT, "this test requires pathlib") def test_pathlib_monkeypath_read_gz(self): + from smart_open.smart_open_lib import pathlib + path = pathlib.Path(CURR_DIR) / 'test_data' / 'crime-and-punishment.txt.gz' # Check that standard implementation can't work with gzip @@ -638,9 +643,7 @@ def test_open_with_keywords_explicit_r(self): actual = fin.read() self.assertEqual(expected, actual) - @unittest.skipUnless( - smart_open_lib.PATHLIB_SUPPORT, - "do not test pathlib support if pathlib or backport are not available") + @unittest.skipUnless(smart_open_lib.PATHLIB_SUPPORT, "this test requires pathlib") def test_open_and_read_pathlib_path(self): """If ``pathlib.Path`` is available we should be able to open and read.""" from smart_open.smart_open_lib import pathlib From 4b2a9045d14685a8ff358fab28dbb74d00cc4ce0 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 22 Mar 2020 19:40:24 +0900 Subject: [PATCH 7/8] misc fixes --- smart_open/tests/test_smart_open.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index da99fdec..3ce3625d 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -289,7 +289,7 @@ def test_gs_uri_contains_slash(self): self.assertEqual(parsed_uri.blob_id, "mydir/myblob") @unittest.skipUnless(smart_open_lib.PATHLIB_SUPPORT, "this test requires pathlib") - def test_pathlib_monkeypath(self): + def test_pathlib_monkeypatch(self): from smart_open.smart_open_lib import pathlib assert pathlib.Path.open != smart_open.open @@ -318,11 +318,12 @@ def test_pathlib_monkeypath_read_gz(self): # Check that our implementation works with gzip obj = patch_pathlib() - with path.open("r") as infile: - lines = infile.readlines() - - self.assertEqual(len(lines), 3) - _patch_pathlib(obj.old_impl) + try: + with path.open("r") as infile: + lines = infile.readlines() + self.assertEqual(len(lines), 3) + finally: + _patch_pathlib(obj.old_impl) class SmartOpenHttpTest(unittest.TestCase): From 7b2ec255bcaa8691764a611f5cee1f5b547a0ec0 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 22 Mar 2020 20:08:35 +0900 Subject: [PATCH 8/8] disable monkey patch on Py2 --- smart_open/smart_open_lib.py | 2 ++ smart_open/tests/test_smart_open.py | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index e66f6525..741cf5bd 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -947,6 +947,8 @@ def _patch_pathlib(func): """Replace `Path.open` with `func`""" if not PATHLIB_SUPPORT: raise RuntimeError('install pathlib (or pathlib2) before using this function') + if six.PY2: + raise RuntimeError('this monkey patch does not work on Py2') old_impl = pathlib.Path.open pathlib.Path.open = func return old_impl diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 3ce3625d..c55be3e1 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -288,7 +288,7 @@ def test_gs_uri_contains_slash(self): self.assertEqual(parsed_uri.bucket_id, "mybucket") self.assertEqual(parsed_uri.blob_id, "mydir/myblob") - @unittest.skipUnless(smart_open_lib.PATHLIB_SUPPORT, "this test requires pathlib") + @unittest.skipUnless(smart_open_lib.six.PY3, "our monkey patch only works on Py3") def test_pathlib_monkeypatch(self): from smart_open.smart_open_lib import pathlib @@ -305,7 +305,7 @@ def test_pathlib_monkeypatch(self): _patch_pathlib(obj.old_impl) assert pathlib.Path.open != smart_open.open - @unittest.skipUnless(smart_open_lib.PATHLIB_SUPPORT, "this test requires pathlib") + @unittest.skipUnless(smart_open_lib.six.PY3, "our monkey patch only works on Py3") def test_pathlib_monkeypath_read_gz(self): from smart_open.smart_open_lib import pathlib @@ -325,6 +325,11 @@ def test_pathlib_monkeypath_read_gz(self): finally: _patch_pathlib(obj.old_impl) + @unittest.skipUnless(smart_open_lib.six.PY2, 'this test is for Py2 only') + def test_monkey_patch_raises_exception_py2(self): + with self.assertRaises(RuntimeError): + patch_pathlib() + class SmartOpenHttpTest(unittest.TestCase): """