From 513cc2cf5026faa4751c26aef01faae3668b9959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= Date: Wed, 12 Jan 2022 00:09:48 +0100 Subject: [PATCH] Revert "ENH: Directly open compressed files" --- doc/source/examining/loading_data.rst | 26 ----- nose_unit.cfg | 2 +- setup.cfg | 1 - tests/ci_install.sh | 5 +- tests/tests.yaml | 1 - yt/__init__.py | 1 - yt/loaders.py | 138 +------------------------- yt/tests/test_load_archive.py | 120 ---------------------- yt/utilities/exceptions.py | 5 - yt/utilities/on_demand_imports.py | 35 ------- 10 files changed, 5 insertions(+), 329 deletions(-) delete mode 100644 yt/tests/test_load_archive.py diff --git a/doc/source/examining/loading_data.rst b/doc/source/examining/loading_data.rst index 884e9391c6e..c1f02ad546a 100644 --- a/doc/source/examining/loading_data.rst +++ b/doc/source/examining/loading_data.rst @@ -36,32 +36,6 @@ any arguments, and it will return a list of the names that can be supplied: This will return a list of possible filenames; more information can be accessed on the data catalog. - -.. _loading-archived-data: - -Archived Data -------------- - -If your data is stored as a (compressed) tar file, you can access the contained -dataset directly without extracting the tar file. -This can be achieved using the ``load_archive`` function: - -.. code-block:: python - - import yt - - ds = yt.load_archive("IsolatedGalaxy.tar.gz", "IsolatedGalaxy/galaxy0030/galaxy0030") - -The first argument is the path to the archive file, the second one is the path to the file to load -in the archive. Subsequent arguments are passed to ``yt.load``. - -The functionality requires the package `ratarmount `_ to be installed. -Under the hood, yt will mount the archive as a (read-only) filesystem. Note that this requires the -entire archive to be read once to compute the location of each file in the archive; subsequent accesses -will be much faster. -All archive formats supported by `ratarmount `_ should be loadable, provided -the dependencies are installed; this includes ``tar``, ``tar.gz`` and tar.bz2`` formats. - .. _loading-amrvac-data: AMRVAC Data diff --git a/nose_unit.cfg b/nose_unit.cfg index 795bcc37fb4..23056b5ddc3 100644 --- a/nose_unit.cfg +++ b/nose_unit.cfg @@ -6,5 +6,5 @@ nologcapture=1 verbosity=2 where=yt with-timer=1 -ignore-files=(test_load_errors.py|test_load_sample.py|test_commons.py|test_ambiguous_fields.py|test_field_access_pytest.py|test_save.py|test_line_annotation_unit.py|test_eps_writer.py|test_registration.py|test_invalid_origin.py|test_outputs_pytest\.py|test_normal_plot_api\.py|test_load_archive\.py) +ignore-files=(test_load_errors.py|test_load_sample.py|test_commons.py|test_ambiguous_fields.py|test_field_access_pytest.py|test_save.py|test_line_annotation_unit.py|test_eps_writer.py|test_registration.py|test_invalid_origin.py|test_outputs_pytest\.py|test_normal_plot_api\.py) exclude-test=yt.frontends.gdf.tests.test_outputs.TestGDF diff --git a/setup.cfg b/setup.cfg index e75acc46164..e441dac6555 100644 --- a/setup.cfg +++ b/setup.cfg @@ -91,7 +91,6 @@ full = requests>=2.20.0 scipy>=1.5.0 xarray>=0.16.1 - ratarmount~=0.8.1;platform_system!="Windows" mapserver = bottle minimal = diff --git a/tests/ci_install.sh b/tests/ci_install.sh index 977f4d6c4c2..15820a3cd8e 100644 --- a/tests/ci_install.sh +++ b/tests/ci_install.sh @@ -10,14 +10,13 @@ linux|Linux) proj-data \ proj-bin \ libgeos-dev \ - libopenmpi-dev \ - libfuse2 + libopenmpi-dev ;; osx|macOS) sudo mkdir -p /usr/local/man sudo chown -R "${USER}:admin" /usr/local/man brew update - HOMEBREW_NO_AUTO_UPDATE=1 brew install hdf5 proj geos open-mpi netcdf ccache osxfuse + HOMEBREW_NO_AUTO_UPDATE=1 brew install hdf5 proj geos open-mpi netcdf ccache ;; esac diff --git a/tests/tests.yaml b/tests/tests.yaml index 8d9fd4c4592..053307feeae 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -193,7 +193,6 @@ other_tests: - "--ignore-files=test_save.py" - "--ignore-files=test_registration.py" - "--ignore-files=test_invalid_origin.py" - - "--ignore-files=test_load_archive\\.py" - "--ignore-files=test_outputs_pytest\\.py" - "--ignore-files=test_normal_plot_api\\.py" - "--exclude-test=yt.frontends.gdf.tests.test_outputs.TestGDF" diff --git a/yt/__init__.py b/yt/__init__.py index 21a21fd1803..d9727ff465e 100644 --- a/yt/__init__.py +++ b/yt/__init__.py @@ -80,7 +80,6 @@ from yt.loaders import ( load, load_amr_grids, - load_archive, load_hexahedral_mesh, load_octree, load_particles, diff --git a/yt/loaders.py b/yt/loaders.py index efb90eaf55a..e63069d2202 100644 --- a/yt/loaders.py +++ b/yt/loaders.py @@ -2,28 +2,20 @@ This module gathers all user-facing functions with a `load_` prefix. """ -import atexit import os import sys import tarfile -import time -import types -import warnings -from multiprocessing import Pipe, Process -from multiprocessing.connection import Connection from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union +from typing import List, Optional, Tuple from urllib.parse import urlsplit import numpy as np from more_itertools import always_iterable -from yt.data_objects.static_output import Dataset from yt.funcs import levenshtein_distance from yt.sample_data.api import lookup_on_disk_data from yt.utilities.decompose import decompose_array, get_psize from yt.utilities.exceptions import ( - MountError, YTAmbiguousDataType, YTIllDefinedAMR, YTSimulationNotIdentified, @@ -36,7 +28,7 @@ output_type_registry, simulation_time_series_registry, ) -from yt.utilities.on_demand_imports import _pooch as pooch, _ratarmount as ratarmount +from yt.utilities.on_demand_imports import _pooch as pooch # --- Loaders for known data formats --- @@ -1433,129 +1425,3 @@ def load_sample( loadable_path = loadable_path.joinpath(load_name, specific_file) return load(loadable_path, **kwargs) - - -def _mount_helper( - archive: str, mountPoint: str, ratarmount_kwa: Dict, conn: Connection -): - try: - fuseOperationsObject = ratarmount.TarMount( - pathToMount=archive, - mountPoint=mountPoint, - lazyMounting=True, - **ratarmount_kwa, - ) - fuseOperationsObject.use_ns = True - conn.send(True) - except Exception: - conn.send(False) - raise - - ratarmount.fuse.FUSE( - operations=fuseOperationsObject, - mountpoint=mountPoint, - foreground=True, - nothreads=True, - ) - - -# --- Loader for tar-based datasets --- -def load_archive( - fn: Union[str, Path], - path: str, - ratarmount_kwa: Optional[Dict] = None, - mount_timeout: float = 1.0, - *args, - **kwargs, -) -> Dataset: - r""" - Load archived data with yt. - - This is a wrapper around :func:`~yt.loaders.load` to include mounting - and unmounting the archive as a read-only filesystem and load it. - - Parameters - ---------- - - fn: str - The `filename` of the archive containing the dataset. - - path: str - The path to the dataset in the archive. - - ratarmount_kwa: dict, optional - Optional parameters to pass to ratarmount to mount the archive. - - mount_timeout: float, optional - The timeout to wait for ratarmount to mount the archive. Default is 1s. - - Notes - ----- - - - The function is experimental and may work or not depending on your setup. - - Any additional keyword argument is passed down to :func:`~yt.loaders.load`. - - This function requires ratarmount to be installed. - - This function does not work on Windows system. - """ - - warnings.warn( - "The 'load_archive' function is still experimental and may be unstable." - ) - - fn = os.path.expanduser(fn) - - # This will raise FileNotFoundError if the path isn't matched - # either in the current dir or yt.config.ytcfg['data_dir_directory'] - if not fn.startswith("http"): - fn = str(lookup_on_disk_data(fn)) - - if ratarmount_kwa is None: - ratarmount_kwa = {} - - try: - tarfile.open(fn) - except tarfile.ReadError: - raise YTUnidentifiedDataType(fn, *args, **kwargs) - - # Note: the temporary directory will be created by ratarmount - tempdir = fn + ".mount" - tempdir_base = tempdir - i = 0 - while os.path.exists(tempdir): - i += 1 - tempdir = f"{tempdir_base}.{i}" - - parent_conn, child_conn = Pipe() - proc = Process(target=_mount_helper, args=(fn, tempdir, ratarmount_kwa, child_conn)) - proc.start() - if not parent_conn.recv(): - raise MountError(f"An error occured while mounting {fn} in {tempdir}") - - # Note: the mounting needs to happen in another process which - # needs be run in the foreground (otherwise it may - # unmount). To prevent a race-condition here, we wait - # for the folder to be mounted within a reasonable time. - t = 0.0 - while t < mount_timeout: - if os.path.ismount(tempdir): - break - time.sleep(0.1) - t += 0.1 - else: - raise MountError(f"Folder {tempdir} does not appear to be mounted") - - # We need to kill the process at exit (to force unmounting) - def umount_callback(): - proc.terminate() - - atexit.register(umount_callback) - - # Alternatively, can dismount manually - def del_callback(self): - proc.terminate() - atexit.unregister(umount_callback) - - ds = load(os.path.join(tempdir, path), *args, **kwargs) - ds.dismount = types.MethodType(del_callback, ds) - - return ds diff --git a/yt/tests/test_load_archive.py b/yt/tests/test_load_archive.py deleted file mode 100644 index f8db2a47c21..00000000000 --- a/yt/tests/test_load_archive.py +++ /dev/null @@ -1,120 +0,0 @@ -import os -import sys -import tarfile -import time - -import pytest - -from yt.config import ytcfg -from yt.loaders import load_archive -from yt.sample_data.api import _download_sample_data_file, get_data_registry_table -from yt.testing import requires_module_pytest -from yt.utilities.exceptions import YTUnidentifiedDataType - - -@pytest.fixture() -def data_registry(): - yield get_data_registry_table() - - -@pytest.fixture() -def tmp_data_dir(tmp_path): - pre_test_data_dir = ytcfg["yt", "test_data_dir"] - ytcfg.set("yt", "test_data_dir", str(tmp_path)) - - yield tmp_path - - ytcfg.set("yt", "test_data_dir", pre_test_data_dir) - - -# Note: ratarmount cannot currently be installed on Windows as of v0.8.1 -@pytest.mark.skipif( - sys.platform.startswith("win"), - reason="ratarmount cannot currently be installed on Windows as of v0.8.1", -) -@pytest.mark.skipif( - os.environ.get("JENKINS_HOME") is not None, - reason="Archive mounting times out on Jenkins.", -) -@requires_module_pytest("pooch", "ratarmount") -@pytest.mark.parametrize( - "fn, exact_loc, class_", - [ - ( - "ToroShockTube.tar.gz", - "ToroShockTube/DD0001/data0001", - "EnzoDataset", - ), - ( - "ramses_sink_00016.tar.gz", - "ramses_sink_00016/output_00016", - "RAMSESDataset", - ), - ], -) -@pytest.mark.parametrize("archive_suffix", ["", ".gz"]) -def test_load_archive( - fn, exact_loc, class_: str, archive_suffix, tmp_data_dir, data_registry -): - # Download the sample .tar.gz'd file - targz_path = _download_sample_data_file(filename=fn) - tar_path = targz_path.with_suffix(archive_suffix) - - if tar_path != targz_path: - # Open the tarfile and uncompress it to .tar, .tar.gz, and .tar.bz2 files - with tarfile.open(targz_path, mode="r:*") as targz: - mode = "w" + archive_suffix.replace(".", ":") - with tarfile.open(tar_path, mode=mode) as tar: - for member in targz.getmembers(): - content = targz.extractfile(member) - tar.addfile(member, fileobj=content) - - # Now try to open the .tar.* files - warn_msg = "The 'load_archive' function is still experimental and may be unstable." - with pytest.warns(UserWarning, match=warn_msg): - ds = load_archive(tar_path, exact_loc, mount_timeout=10) - assert type(ds).__name__ == class_ - - # Make sure the index is readable - ds.index - - # Check cleanup - mount_path = tar_path.with_name(tar_path.name + ".mount") - assert mount_path.is_mount() - - ## Manually dismount - ds.dismount() - - ## The dismounting happens concurrently, wait a few sec. - time.sleep(2) - - ## Mount path should not exist anymore *and* have been deleted - assert not mount_path.is_mount() - assert not mount_path.exists() - - -@pytest.mark.skipif( - sys.platform.startswith("win"), - reason="ratarmount cannot currently be installed on Windows as of v0.8.1", -) -@pytest.mark.skipif( - os.environ.get("JENKINS_HOME") is not None, - reason="Archive mounting times out on Jenkins.", -) -@pytest.mark.filterwarnings( - "ignore:The 'load_archive' function is still experimental and may be unstable." -) -@requires_module_pytest("pooch", "ratarmount") -def test_load_invalid_archive(tmp_data_dir, data_registry): - # Archive does not exist - with pytest.raises(FileNotFoundError): - load_archive("this_file_does_not_exist.tar.gz", "invalid_location") - - targz_path = _download_sample_data_file(filename="ToroShockTube.tar.gz") - # File does not exist - with pytest.raises(FileNotFoundError): - load_archive(targz_path, "invalid_location") - - # File exists but is not recognized - with pytest.raises(YTUnidentifiedDataType): - load_archive(targz_path, "ToroShockTube/DD0001/data0001.memorymap") diff --git a/yt/utilities/exceptions.py b/yt/utilities/exceptions.py index b1aa09e0e5c..7f6472daec4 100644 --- a/yt/utilities/exceptions.py +++ b/yt/utilities/exceptions.py @@ -922,8 +922,3 @@ class GenerationInProgress(Exception): def __init__(self, fields): self.fields = fields super().__init__() - - -class MountError(Exception): - def __init__(self, message): - self.message = message diff --git a/yt/utilities/on_demand_imports.py b/yt/utilities/on_demand_imports.py index e1eaa3c6823..8d520dac6f2 100644 --- a/yt/utilities/on_demand_imports.py +++ b/yt/utilities/on_demand_imports.py @@ -677,38 +677,3 @@ def server(self): _firefly = firefly_imports() - - -# Note: ratarmount may fail with an OSError on import if libfuse is missing -# In this case, we want the on-demand-import to fail _where_ ratarmount -# is being used, rather than at startup. -# We could catch the OSError and throw it again when we try to access -# ratarmount. Instead here, we delay as much as possible the actual import of -# the package which thus raises an exception where expected. -# -# Note 2: we need to store the imported module in __module, as _module plays -# a special role in on-demand-imports (e.g. used for testing purposes to know -# if the package has been installed). -class ratarmount_imports: - _name = "ratarmount" - __module = None - - @property - def _module(self): - if self.__module is not None: - return self.__module - - try: - import ratarmount as myself - - self.__module = myself - except ImportError: - self.__module = NotAModule(self._name) - - return self.__module - - def __getattr__(self, attr): - return getattr(self._module, attr) - - -_ratarmount = ratarmount_imports()