diff --git a/doc/source/examining/loading_data.rst b/doc/source/examining/loading_data.rst
index 884e9391c6e..c1f02ad546a 100644
--- a/doc/source/examining/loading_data.rst
+++ b/doc/source/examining/loading_data.rst
@@ -36,32 +36,6 @@ any arguments, and it will return a list of the names that can be supplied:
This will return a list of possible filenames; more information can be accessed on the data catalog.
-
-.. _loading-archived-data:
-
-Archived Data
--------------
-
-If your data is stored as a (compressed) tar file, you can access the contained
-dataset directly without extracting the tar file.
-This can be achieved using the ``load_archive`` function:
-
-.. code-block:: python
-
- import yt
-
- ds = yt.load_archive("IsolatedGalaxy.tar.gz", "IsolatedGalaxy/galaxy0030/galaxy0030")
-
-The first argument is the path to the archive file, the second one is the path to the file to load
-in the archive. Subsequent arguments are passed to ``yt.load``.
-
-The functionality requires the package `ratarmount `_ to be installed.
-Under the hood, yt will mount the archive as a (read-only) filesystem. Note that this requires the
-entire archive to be read once to compute the location of each file in the archive; subsequent accesses
-will be much faster.
-All archive formats supported by `ratarmount `_ should be loadable, provided
-the dependencies are installed; this includes ``tar``, ``tar.gz`` and tar.bz2`` formats.
-
.. _loading-amrvac-data:
AMRVAC Data
diff --git a/nose_unit.cfg b/nose_unit.cfg
index 795bcc37fb4..23056b5ddc3 100644
--- a/nose_unit.cfg
+++ b/nose_unit.cfg
@@ -6,5 +6,5 @@ nologcapture=1
verbosity=2
where=yt
with-timer=1
-ignore-files=(test_load_errors.py|test_load_sample.py|test_commons.py|test_ambiguous_fields.py|test_field_access_pytest.py|test_save.py|test_line_annotation_unit.py|test_eps_writer.py|test_registration.py|test_invalid_origin.py|test_outputs_pytest\.py|test_normal_plot_api\.py|test_load_archive\.py)
+ignore-files=(test_load_errors.py|test_load_sample.py|test_commons.py|test_ambiguous_fields.py|test_field_access_pytest.py|test_save.py|test_line_annotation_unit.py|test_eps_writer.py|test_registration.py|test_invalid_origin.py|test_outputs_pytest\.py|test_normal_plot_api\.py)
exclude-test=yt.frontends.gdf.tests.test_outputs.TestGDF
diff --git a/setup.cfg b/setup.cfg
index e75acc46164..e441dac6555 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -91,7 +91,6 @@ full =
requests>=2.20.0
scipy>=1.5.0
xarray>=0.16.1
- ratarmount~=0.8.1;platform_system!="Windows"
mapserver =
bottle
minimal =
diff --git a/tests/ci_install.sh b/tests/ci_install.sh
index 977f4d6c4c2..15820a3cd8e 100644
--- a/tests/ci_install.sh
+++ b/tests/ci_install.sh
@@ -10,14 +10,13 @@ linux|Linux)
proj-data \
proj-bin \
libgeos-dev \
- libopenmpi-dev \
- libfuse2
+ libopenmpi-dev
;;
osx|macOS)
sudo mkdir -p /usr/local/man
sudo chown -R "${USER}:admin" /usr/local/man
brew update
- HOMEBREW_NO_AUTO_UPDATE=1 brew install hdf5 proj geos open-mpi netcdf ccache osxfuse
+ HOMEBREW_NO_AUTO_UPDATE=1 brew install hdf5 proj geos open-mpi netcdf ccache
;;
esac
diff --git a/tests/tests.yaml b/tests/tests.yaml
index 8d9fd4c4592..053307feeae 100644
--- a/tests/tests.yaml
+++ b/tests/tests.yaml
@@ -193,7 +193,6 @@ other_tests:
- "--ignore-files=test_save.py"
- "--ignore-files=test_registration.py"
- "--ignore-files=test_invalid_origin.py"
- - "--ignore-files=test_load_archive\\.py"
- "--ignore-files=test_outputs_pytest\\.py"
- "--ignore-files=test_normal_plot_api\\.py"
- "--exclude-test=yt.frontends.gdf.tests.test_outputs.TestGDF"
diff --git a/yt/__init__.py b/yt/__init__.py
index 21a21fd1803..d9727ff465e 100644
--- a/yt/__init__.py
+++ b/yt/__init__.py
@@ -80,7 +80,6 @@
from yt.loaders import (
load,
load_amr_grids,
- load_archive,
load_hexahedral_mesh,
load_octree,
load_particles,
diff --git a/yt/loaders.py b/yt/loaders.py
index efb90eaf55a..e63069d2202 100644
--- a/yt/loaders.py
+++ b/yt/loaders.py
@@ -2,28 +2,20 @@
This module gathers all user-facing functions with a `load_` prefix.
"""
-import atexit
import os
import sys
import tarfile
-import time
-import types
-import warnings
-from multiprocessing import Pipe, Process
-from multiprocessing.connection import Connection
from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
+from typing import List, Optional, Tuple
from urllib.parse import urlsplit
import numpy as np
from more_itertools import always_iterable
-from yt.data_objects.static_output import Dataset
from yt.funcs import levenshtein_distance
from yt.sample_data.api import lookup_on_disk_data
from yt.utilities.decompose import decompose_array, get_psize
from yt.utilities.exceptions import (
- MountError,
YTAmbiguousDataType,
YTIllDefinedAMR,
YTSimulationNotIdentified,
@@ -36,7 +28,7 @@
output_type_registry,
simulation_time_series_registry,
)
-from yt.utilities.on_demand_imports import _pooch as pooch, _ratarmount as ratarmount
+from yt.utilities.on_demand_imports import _pooch as pooch
# --- Loaders for known data formats ---
@@ -1433,129 +1425,3 @@ def load_sample(
loadable_path = loadable_path.joinpath(load_name, specific_file)
return load(loadable_path, **kwargs)
-
-
-def _mount_helper(
- archive: str, mountPoint: str, ratarmount_kwa: Dict, conn: Connection
-):
- try:
- fuseOperationsObject = ratarmount.TarMount(
- pathToMount=archive,
- mountPoint=mountPoint,
- lazyMounting=True,
- **ratarmount_kwa,
- )
- fuseOperationsObject.use_ns = True
- conn.send(True)
- except Exception:
- conn.send(False)
- raise
-
- ratarmount.fuse.FUSE(
- operations=fuseOperationsObject,
- mountpoint=mountPoint,
- foreground=True,
- nothreads=True,
- )
-
-
-# --- Loader for tar-based datasets ---
-def load_archive(
- fn: Union[str, Path],
- path: str,
- ratarmount_kwa: Optional[Dict] = None,
- mount_timeout: float = 1.0,
- *args,
- **kwargs,
-) -> Dataset:
- r"""
- Load archived data with yt.
-
- This is a wrapper around :func:`~yt.loaders.load` to include mounting
- and unmounting the archive as a read-only filesystem and load it.
-
- Parameters
- ----------
-
- fn: str
- The `filename` of the archive containing the dataset.
-
- path: str
- The path to the dataset in the archive.
-
- ratarmount_kwa: dict, optional
- Optional parameters to pass to ratarmount to mount the archive.
-
- mount_timeout: float, optional
- The timeout to wait for ratarmount to mount the archive. Default is 1s.
-
- Notes
- -----
-
- - The function is experimental and may work or not depending on your setup.
- - Any additional keyword argument is passed down to :func:`~yt.loaders.load`.
- - This function requires ratarmount to be installed.
- - This function does not work on Windows system.
- """
-
- warnings.warn(
- "The 'load_archive' function is still experimental and may be unstable."
- )
-
- fn = os.path.expanduser(fn)
-
- # This will raise FileNotFoundError if the path isn't matched
- # either in the current dir or yt.config.ytcfg['data_dir_directory']
- if not fn.startswith("http"):
- fn = str(lookup_on_disk_data(fn))
-
- if ratarmount_kwa is None:
- ratarmount_kwa = {}
-
- try:
- tarfile.open(fn)
- except tarfile.ReadError:
- raise YTUnidentifiedDataType(fn, *args, **kwargs)
-
- # Note: the temporary directory will be created by ratarmount
- tempdir = fn + ".mount"
- tempdir_base = tempdir
- i = 0
- while os.path.exists(tempdir):
- i += 1
- tempdir = f"{tempdir_base}.{i}"
-
- parent_conn, child_conn = Pipe()
- proc = Process(target=_mount_helper, args=(fn, tempdir, ratarmount_kwa, child_conn))
- proc.start()
- if not parent_conn.recv():
- raise MountError(f"An error occured while mounting {fn} in {tempdir}")
-
- # Note: the mounting needs to happen in another process which
- # needs be run in the foreground (otherwise it may
- # unmount). To prevent a race-condition here, we wait
- # for the folder to be mounted within a reasonable time.
- t = 0.0
- while t < mount_timeout:
- if os.path.ismount(tempdir):
- break
- time.sleep(0.1)
- t += 0.1
- else:
- raise MountError(f"Folder {tempdir} does not appear to be mounted")
-
- # We need to kill the process at exit (to force unmounting)
- def umount_callback():
- proc.terminate()
-
- atexit.register(umount_callback)
-
- # Alternatively, can dismount manually
- def del_callback(self):
- proc.terminate()
- atexit.unregister(umount_callback)
-
- ds = load(os.path.join(tempdir, path), *args, **kwargs)
- ds.dismount = types.MethodType(del_callback, ds)
-
- return ds
diff --git a/yt/tests/test_load_archive.py b/yt/tests/test_load_archive.py
deleted file mode 100644
index f8db2a47c21..00000000000
--- a/yt/tests/test_load_archive.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import sys
-import tarfile
-import time
-
-import pytest
-
-from yt.config import ytcfg
-from yt.loaders import load_archive
-from yt.sample_data.api import _download_sample_data_file, get_data_registry_table
-from yt.testing import requires_module_pytest
-from yt.utilities.exceptions import YTUnidentifiedDataType
-
-
-@pytest.fixture()
-def data_registry():
- yield get_data_registry_table()
-
-
-@pytest.fixture()
-def tmp_data_dir(tmp_path):
- pre_test_data_dir = ytcfg["yt", "test_data_dir"]
- ytcfg.set("yt", "test_data_dir", str(tmp_path))
-
- yield tmp_path
-
- ytcfg.set("yt", "test_data_dir", pre_test_data_dir)
-
-
-# Note: ratarmount cannot currently be installed on Windows as of v0.8.1
-@pytest.mark.skipif(
- sys.platform.startswith("win"),
- reason="ratarmount cannot currently be installed on Windows as of v0.8.1",
-)
-@pytest.mark.skipif(
- os.environ.get("JENKINS_HOME") is not None,
- reason="Archive mounting times out on Jenkins.",
-)
-@requires_module_pytest("pooch", "ratarmount")
-@pytest.mark.parametrize(
- "fn, exact_loc, class_",
- [
- (
- "ToroShockTube.tar.gz",
- "ToroShockTube/DD0001/data0001",
- "EnzoDataset",
- ),
- (
- "ramses_sink_00016.tar.gz",
- "ramses_sink_00016/output_00016",
- "RAMSESDataset",
- ),
- ],
-)
-@pytest.mark.parametrize("archive_suffix", ["", ".gz"])
-def test_load_archive(
- fn, exact_loc, class_: str, archive_suffix, tmp_data_dir, data_registry
-):
- # Download the sample .tar.gz'd file
- targz_path = _download_sample_data_file(filename=fn)
- tar_path = targz_path.with_suffix(archive_suffix)
-
- if tar_path != targz_path:
- # Open the tarfile and uncompress it to .tar, .tar.gz, and .tar.bz2 files
- with tarfile.open(targz_path, mode="r:*") as targz:
- mode = "w" + archive_suffix.replace(".", ":")
- with tarfile.open(tar_path, mode=mode) as tar:
- for member in targz.getmembers():
- content = targz.extractfile(member)
- tar.addfile(member, fileobj=content)
-
- # Now try to open the .tar.* files
- warn_msg = "The 'load_archive' function is still experimental and may be unstable."
- with pytest.warns(UserWarning, match=warn_msg):
- ds = load_archive(tar_path, exact_loc, mount_timeout=10)
- assert type(ds).__name__ == class_
-
- # Make sure the index is readable
- ds.index
-
- # Check cleanup
- mount_path = tar_path.with_name(tar_path.name + ".mount")
- assert mount_path.is_mount()
-
- ## Manually dismount
- ds.dismount()
-
- ## The dismounting happens concurrently, wait a few sec.
- time.sleep(2)
-
- ## Mount path should not exist anymore *and* have been deleted
- assert not mount_path.is_mount()
- assert not mount_path.exists()
-
-
-@pytest.mark.skipif(
- sys.platform.startswith("win"),
- reason="ratarmount cannot currently be installed on Windows as of v0.8.1",
-)
-@pytest.mark.skipif(
- os.environ.get("JENKINS_HOME") is not None,
- reason="Archive mounting times out on Jenkins.",
-)
-@pytest.mark.filterwarnings(
- "ignore:The 'load_archive' function is still experimental and may be unstable."
-)
-@requires_module_pytest("pooch", "ratarmount")
-def test_load_invalid_archive(tmp_data_dir, data_registry):
- # Archive does not exist
- with pytest.raises(FileNotFoundError):
- load_archive("this_file_does_not_exist.tar.gz", "invalid_location")
-
- targz_path = _download_sample_data_file(filename="ToroShockTube.tar.gz")
- # File does not exist
- with pytest.raises(FileNotFoundError):
- load_archive(targz_path, "invalid_location")
-
- # File exists but is not recognized
- with pytest.raises(YTUnidentifiedDataType):
- load_archive(targz_path, "ToroShockTube/DD0001/data0001.memorymap")
diff --git a/yt/utilities/exceptions.py b/yt/utilities/exceptions.py
index b1aa09e0e5c..7f6472daec4 100644
--- a/yt/utilities/exceptions.py
+++ b/yt/utilities/exceptions.py
@@ -922,8 +922,3 @@ class GenerationInProgress(Exception):
def __init__(self, fields):
self.fields = fields
super().__init__()
-
-
-class MountError(Exception):
- def __init__(self, message):
- self.message = message
diff --git a/yt/utilities/on_demand_imports.py b/yt/utilities/on_demand_imports.py
index e1eaa3c6823..8d520dac6f2 100644
--- a/yt/utilities/on_demand_imports.py
+++ b/yt/utilities/on_demand_imports.py
@@ -677,38 +677,3 @@ def server(self):
_firefly = firefly_imports()
-
-
-# Note: ratarmount may fail with an OSError on import if libfuse is missing
-# In this case, we want the on-demand-import to fail _where_ ratarmount
-# is being used, rather than at startup.
-# We could catch the OSError and throw it again when we try to access
-# ratarmount. Instead here, we delay as much as possible the actual import of
-# the package which thus raises an exception where expected.
-#
-# Note 2: we need to store the imported module in __module, as _module plays
-# a special role in on-demand-imports (e.g. used for testing purposes to know
-# if the package has been installed).
-class ratarmount_imports:
- _name = "ratarmount"
- __module = None
-
- @property
- def _module(self):
- if self.__module is not None:
- return self.__module
-
- try:
- import ratarmount as myself
-
- self.__module = myself
- except ImportError:
- self.__module = NotAModule(self._name)
-
- return self.__module
-
- def __getattr__(self, attr):
- return getattr(self._module, attr)
-
-
-_ratarmount = ratarmount_imports()