From cf993618fd9d051f6a87592177a61da44b0f7442 Mon Sep 17 00:00:00 2001 From: Dan Nowacki Date: Wed, 24 Apr 2019 20:57:20 -0700 Subject: [PATCH] Implement load_dataset() and load_dataarray() BUG: Fixes #2887 by adding @shoyer solution for load_dataset and load_dataarray, wrappers around open_dataset and open_dataarray which open, load, and close the file and return the Dataset/DataArray TST: Add tests for sequentially opening and writing to files using new functions DOC: Add to whats-new.rst. Also a tiny change to the open_dataset docstring Update docstrings and check for cache in kwargs Undeprecate load_dataset Add to api.rst, fix whats-new.rst typo, raise error instead of warning --- doc/api.rst | 2 ++ doc/whats-new.rst | 12 ++++++-- xarray/__init__.py | 2 +- xarray/backends/api.py | 57 +++++++++++++++++++++++++++++++++-- xarray/tests/test_backends.py | 19 +++++++++++- xarray/tutorial.py | 15 +++------ 6 files changed, 90 insertions(+), 17 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 00b33959eed..0e766f2cf9a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -460,6 +460,7 @@ Dataset methods :toctree: generated/ open_dataset + load_dataset open_mfdataset open_rasterio open_zarr @@ -487,6 +488,7 @@ DataArray methods :toctree: generated/ open_dataarray + load_dataarray DataArray.to_dataset DataArray.to_netcdf DataArray.to_pandas diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac1b5269bfa..d904a3814f1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,6 +29,12 @@ Enhancements By `James McCreight `_. - Clean up Python 2 compatibility in code (:issue:`2950`) By `Guido Imperiale `_. +- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to + ``open_dataset()`` and ``open_dataarray()`` to open, load into memory, + and close files, returning the Dataset or DataArray. These functions are + helpful for avoiding file-lock errors when trying to write to files opened + using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`) + By `Dan Nowacki `_. Bug fixes ~~~~~~~~~ @@ -153,9 +159,9 @@ Other enhancements By `Keisuke Fujii `_. - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`). By `Kevin Squire `_. -- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` - parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for - backwards compatibility. The ``overwrite_encoded_chunks`` parameter is +- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` + parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for + backwards compatibility. The ``overwrite_encoded_chunks`` parameter is added to remove the original zarr chunk encoding. By `Lily Wang `_. diff --git a/xarray/__init__.py b/xarray/__init__.py index 773dfe19d01..506cb46de26 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -17,7 +17,7 @@ from .core.options import set_options from .backends.api import (open_dataset, open_dataarray, open_mfdataset, - save_mfdataset) + save_mfdataset, load_dataset, load_dataarray) from .backends.rasterio_ import open_rasterio from .backends.zarr import open_zarr diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 7c5040580fe..01188e92752 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -185,12 +185,64 @@ def _finalize_store(write, store): store.close() +def load_dataset(filename_or_obj, **kwargs): + """Open, load into memory, and close a Dataset from a file or file-like + object. + + This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs + from `open_dataset` in that it loads the Dataset into memory, closes the + file, and returns the Dataset. In contrast, `open_dataset` keeps the file + handle open and lazy loads its contents. All parameters are passed directly + to `open_dataset`. See that documentation for further details. + + Returns + ------- + dataset : Dataset + The newly created Dataset. + + See Also + -------- + open_dataset + """ + if 'cache' in kwargs: + raise TypeError('cache has no effect in this context') + + with open_dataset(filename_or_obj, **kwargs) as ds: + return ds.load() + + +def load_dataarray(filename_or_obj, **kwargs): + """Open, load into memory, and close a DataArray from a file or file-like + object containing a single data variable. + + This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs + from `open_dataarray` in that it loads the Dataset into memory, closes the + file, and returns the Dataset. In contrast, `open_dataarray` keeps the file + handle open and lazy loads its contents. All parameters are passed directly + to `open_dataarray`. See that documentation for further details. + + Returns + ------- + datarray : DataArray + The newly created DataArray. + + See Also + -------- + open_dataarray + """ + if 'cache' in kwargs: + raise TypeError('cache has no effect in this context') + + with open_dataarray(filename_or_obj, **kwargs) as da: + return da.load() + + def open_dataset(filename_or_obj, group=None, decode_cf=True, mask_and_scale=None, decode_times=True, autoclose=None, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, backend_kwargs=None, use_cftime=None): - """Load and decode a dataset from a file or file-like object. + """Open and decode a dataset from a file or file-like object. Parameters ---------- @@ -406,7 +458,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, backend_kwargs=None, use_cftime=None): - """Open an DataArray from a netCDF file containing a single data variable. + """Open an DataArray from a file or file-like object containing a single + data variable. This is designed to read netCDF files with only one data variable. If multiple variables are present then a ValueError is raised. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a4c0374e158..f31d3bf4f9b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -19,7 +19,7 @@ import xarray as xr from xarray import ( DataArray, Dataset, backends, open_dataarray, open_dataset, open_mfdataset, - save_mfdataset) + save_mfdataset, load_dataset, load_dataarray) from xarray.backends.common import robust_getitem from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding from xarray.backends.pydap_ import PydapDataStore @@ -2641,6 +2641,23 @@ def test_save_mfdataset_compute_false_roundtrip(self): with open_mfdataset([tmp1, tmp2]) as actual: assert_identical(actual, original) + def test_load_dataset(self): + with create_tmp_file() as tmp: + original = Dataset({'foo': ('x', np.random.randn(10))}) + original.to_netcdf(tmp) + ds = load_dataset(tmp) + # this would fail if we used open_dataset instead of load_dataset + ds.to_netcdf(tmp) + + def test_load_dataarray(self): + with create_tmp_file() as tmp: + original = Dataset({'foo': ('x', np.random.randn(10))}) + original.to_netcdf(tmp) + ds = load_dataarray(tmp) + # this would fail if we used open_dataarray instead of + # load_dataarray + ds.to_netcdf(tmp) + @requires_scipy_or_netCDF4 @requires_pydap diff --git a/xarray/tutorial.py b/xarray/tutorial.py index f54cf7b3889..1a977450ed6 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -27,7 +27,7 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir, github_url='https://github.com/pydata/xarray-data', branch='master', **kws): """ - Load a dataset from the online repository (requires internet). + Open a dataset from the online repository (requires internet). If a local copy is found then always use that to avoid network traffic. @@ -91,17 +91,12 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir, def load_dataset(*args, **kwargs): """ - `load_dataset` will be removed a future version of xarray. The current - behavior of this function can be achived by using - `tutorial.open_dataset(...).load()`. + Open, load into memory, and close a dataset from the online repository + (requires internet). See Also -------- open_dataset """ - warnings.warn( - "load_dataset` will be removed in a future version of xarray. The " - "current behavior of this function can be achived by using " - "`tutorial.open_dataset(...).load()`.", - DeprecationWarning, stacklevel=2) - return open_dataset(*args, **kwargs).load() + with open_dataset(*args, **kwargs) as ds: + return ds.load()