From 88530b0392dde9c6f17a782aa41bc056a1082856 Mon Sep 17 00:00:00 2001 From: alexbarros Date: Tue, 12 Mar 2024 10:00:13 -0300 Subject: [PATCH 1/8] fix: ge unit tests refering to deprecated method --- tests/unit/test_ge_integration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_ge_integration.py b/tests/unit/test_ge_integration.py index 773b709ba..79b9d90ea 100644 --- a/tests/unit/test_ge_integration.py +++ b/tests/unit/test_ge_integration.py @@ -42,7 +42,7 @@ def test_to_expectations_suite_context_save_and_build_data_docs(mod, context, df mod.data_context.DataContext.assert_not_called() mod.dataset.PandasDataset.assert_called_once() - context.create_expectation_suite.assert_called_once() + context.add_expectation_suite.assert_called_once() context.save_expectation_suite.assert_called_once() context.build_data_docs.assert_called_once() context.open_data_docs.assert_called_once() @@ -60,7 +60,7 @@ def test_to_expectations_suite_context_no_save_and_build_data_docs(mod, context, mod.data_context.DataContext.assert_not_called() mod.dataset.PandasDataset.assert_called_once() - context.create_expectation_suite.assert_called_once() + context.add_expectation_suite.assert_called_once() context.save_expectation_suite.assert_called_once() context.build_data_docs.assert_called_once() context.open_data_docs.assert_called_once() @@ -78,7 +78,7 @@ def test_to_expectations_suite_context_no_save_and_no_build_data_docs(mod, conte mod.data_context.DataContext.assert_not_called() mod.dataset.PandasDataset.assert_called_once() - context.create_expectation_suite.assert_called_once() + context.add_expectation_suite.assert_called_once() context.save_expectation_suite.assert_not_called() context.build_data_docs.assert_not_called() context.open_data_docs.assert_not_called() @@ -92,7 +92,7 @@ def test_to_expectations_suite_title(context, df): run_validation=False, ) - context.create_expectation_suite.assert_called_once_with( + context.add_expectation_suite.assert_called_once_with( "expectations-dataset", overwrite_existing=True ) From ba6e16f26f8d145f7b01c867dbd06650c9327499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Portela=20Afonso?= Date: Tue, 12 Mar 2024 17:56:56 +0000 Subject: [PATCH 2/8] change cache_file to use requests instead of urllib --- src/ydata_profiling/utils/cache.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ydata_profiling/utils/cache.py b/src/ydata_profiling/utils/cache.py index cb1bbce9c..157515c1e 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/ydata_profiling/utils/cache.py @@ -1,7 +1,7 @@ """Dataset cache utility functions""" import zipfile from pathlib import Path -from urllib import request +from requests import get as get_file from ydata_profiling.utils.paths import get_data_path @@ -24,8 +24,8 @@ def cache_file(file_name: str, url: str) -> Path: # If not exists, download and create file if not file_path.exists(): - response = request.urlopen(url) - file_path.write_bytes(response.read()) + response = get_file(url, allow_redirects=True) + file_path.write_bytes(response.content) return file_path From b0b89bdb0c86819cbf07906427e76c07970125e4 Mon Sep 17 00:00:00 2001 From: Azory YData Bot Date: Tue, 12 Mar 2024 18:01:09 +0000 Subject: [PATCH 3/8] fix(linting): code formatting --- src/ydata_profiling/utils/cache.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ydata_profiling/utils/cache.py b/src/ydata_profiling/utils/cache.py index 157515c1e..41d71585c 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/ydata_profiling/utils/cache.py @@ -1,6 +1,7 @@ """Dataset cache utility functions""" import zipfile from pathlib import Path + from requests import get as get_file from ydata_profiling.utils.paths import get_data_path From 94f96f5c5215b249a19a797b338a1ca7e306d0e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Portela=20Afonso?= Date: Tue, 12 Mar 2024 18:16:34 +0000 Subject: [PATCH 4/8] requests response raise for status --- src/ydata_profiling/utils/cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ydata_profiling/utils/cache.py b/src/ydata_profiling/utils/cache.py index 41d71585c..da8a20d01 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/ydata_profiling/utils/cache.py @@ -26,6 +26,8 @@ def cache_file(file_name: str, url: str) -> Path: # If not exists, download and create file if not file_path.exists(): response = get_file(url, allow_redirects=True) + response.raise_for_status() + file_path.write_bytes(response.content) return file_path @@ -49,7 +51,8 @@ def cache_zipped_file(file_name: str, url: str) -> Path: # If not exists, download and create file if not file_path.exists(): - response = request.urlopen(url) + response = get_file(url, allow_redirects=True) + response.raise_for_status() tmp_path = data_path / "tmp.zip" tmp_path.write_bytes(response.read()) From c9e7d312f341f09dd57c64af499b980ea1bdbe11 Mon Sep 17 00:00:00 2001 From: Azory YData Bot Date: Tue, 12 Mar 2024 18:21:12 +0000 Subject: [PATCH 5/8] fix(linting): code formatting --- src/ydata_profiling/utils/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ydata_profiling/utils/cache.py b/src/ydata_profiling/utils/cache.py index da8a20d01..b0cc27230 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/ydata_profiling/utils/cache.py @@ -27,7 +27,7 @@ def cache_file(file_name: str, url: str) -> Path: if not file_path.exists(): response = get_file(url, allow_redirects=True) response.raise_for_status() - + file_path.write_bytes(response.content) return file_path From 3a7e8c169226ec4f43cbb1fefc575a0df0abe6e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Portela=20Afonso?= Date: Tue, 12 Mar 2024 18:30:41 +0000 Subject: [PATCH 6/8] change read to content --- src/ydata_profiling/utils/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ydata_profiling/utils/cache.py b/src/ydata_profiling/utils/cache.py index b0cc27230..074aea023 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/ydata_profiling/utils/cache.py @@ -55,7 +55,7 @@ def cache_zipped_file(file_name: str, url: str) -> Path: response.raise_for_status() tmp_path = data_path / "tmp.zip" - tmp_path.write_bytes(response.read()) + tmp_path.write_bytes(response.content) with zipfile.ZipFile(tmp_path, "r") as zip_file: zip_file.extract(file_path.name, data_path) From cb265ccbdd896d4716324f7065a4e6fac218b9af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Portela=20Afonso?= Date: Tue, 12 Mar 2024 18:36:35 +0000 Subject: [PATCH 7/8] use cache_file to download zipfile --- src/ydata_profiling/utils/cache.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/ydata_profiling/utils/cache.py b/src/ydata_profiling/utils/cache.py index 074aea023..10d04d76e 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/ydata_profiling/utils/cache.py @@ -45,17 +45,11 @@ def cache_zipped_file(file_name: str, url: str) -> Path: """ data_path = get_data_path() - data_path.mkdir(exist_ok=True) - file_path = data_path / file_name # If not exists, download and create file if not file_path.exists(): - response = get_file(url, allow_redirects=True) - response.raise_for_status() - - tmp_path = data_path / "tmp.zip" - tmp_path.write_bytes(response.content) + tmp_path = cache_file('tmp.zip', url) with zipfile.ZipFile(tmp_path, "r") as zip_file: zip_file.extract(file_path.name, data_path) From e351ad1c7179e0a8fd09a82e9244a8ddfadd746f Mon Sep 17 00:00:00 2001 From: Azory YData Bot Date: Tue, 12 Mar 2024 18:40:38 +0000 Subject: [PATCH 8/8] fix(linting): code formatting --- src/ydata_profiling/utils/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ydata_profiling/utils/cache.py b/src/ydata_profiling/utils/cache.py index 10d04d76e..8640d86a6 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/ydata_profiling/utils/cache.py @@ -49,7 +49,7 @@ def cache_zipped_file(file_name: str, url: str) -> Path: # If not exists, download and create file if not file_path.exists(): - tmp_path = cache_file('tmp.zip', url) + tmp_path = cache_file("tmp.zip", url) with zipfile.ZipFile(tmp_path, "r") as zip_file: zip_file.extract(file_path.name, data_path)