diff --git a/src/ydata_profiling/utils/cache.py b/src/ydata_profiling/utils/cache.py index cb1bbce9c..8640d86a6 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/ydata_profiling/utils/cache.py @@ -1,7 +1,8 @@ """Dataset cache utility functions""" import zipfile from pathlib import Path -from urllib import request + +from requests import get as get_file from ydata_profiling.utils.paths import get_data_path @@ -24,8 +25,10 @@ def cache_file(file_name: str, url: str) -> Path: # If not exists, download and create file if not file_path.exists(): - response = request.urlopen(url) - file_path.write_bytes(response.read()) + response = get_file(url, allow_redirects=True) + response.raise_for_status() + + file_path.write_bytes(response.content) return file_path @@ -42,16 +45,11 @@ def cache_zipped_file(file_name: str, url: str) -> Path: """ data_path = get_data_path() - data_path.mkdir(exist_ok=True) - file_path = data_path / file_name # If not exists, download and create file if not file_path.exists(): - response = request.urlopen(url) - - tmp_path = data_path / "tmp.zip" - tmp_path.write_bytes(response.read()) + tmp_path = cache_file("tmp.zip", url) with zipfile.ZipFile(tmp_path, "r") as zip_file: zip_file.extract(file_path.name, data_path) diff --git a/tests/unit/test_ge_integration.py b/tests/unit/test_ge_integration.py index 773b709ba..79b9d90ea 100644 --- a/tests/unit/test_ge_integration.py +++ b/tests/unit/test_ge_integration.py @@ -42,7 +42,7 @@ def test_to_expectations_suite_context_save_and_build_data_docs(mod, context, df mod.data_context.DataContext.assert_not_called() mod.dataset.PandasDataset.assert_called_once() - context.create_expectation_suite.assert_called_once() + context.add_expectation_suite.assert_called_once() context.save_expectation_suite.assert_called_once() context.build_data_docs.assert_called_once() context.open_data_docs.assert_called_once() @@ -60,7 +60,7 @@ def test_to_expectations_suite_context_no_save_and_build_data_docs(mod, context, mod.data_context.DataContext.assert_not_called() mod.dataset.PandasDataset.assert_called_once() - context.create_expectation_suite.assert_called_once() + context.add_expectation_suite.assert_called_once() context.save_expectation_suite.assert_called_once() context.build_data_docs.assert_called_once() context.open_data_docs.assert_called_once() @@ -78,7 +78,7 @@ def test_to_expectations_suite_context_no_save_and_no_build_data_docs(mod, conte mod.data_context.DataContext.assert_not_called() mod.dataset.PandasDataset.assert_called_once() - context.create_expectation_suite.assert_called_once() + context.add_expectation_suite.assert_called_once() context.save_expectation_suite.assert_not_called() context.build_data_docs.assert_not_called() context.open_data_docs.assert_not_called() @@ -92,7 +92,7 @@ def test_to_expectations_suite_title(context, df): run_validation=False, ) - context.create_expectation_suite.assert_called_once_with( + context.add_expectation_suite.assert_called_once_with( "expectations-dataset", overwrite_existing=True )