Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement load_dataset() and load_dataarray() #2917

Merged
merged 5 commits into from
May 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ Dataset methods
:toctree: generated/

open_dataset
load_dataset
open_mfdataset
open_rasterio
open_zarr
Expand Down Expand Up @@ -487,6 +488,7 @@ DataArray methods
:toctree: generated/

open_dataarray
load_dataarray
DataArray.to_dataset
DataArray.to_netcdf
DataArray.to_pandas
Expand Down
12 changes: 9 additions & 3 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ Enhancements
By `James McCreight <https://github.com/jmccreight>`_.
- Clean up Python 2 compatibility in code (:issue:`2950`)
By `Guido Imperiale <https://github.com/crusaderky>`_.
- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to
``open_dataset()`` and ``open_dataarray()`` to open, load into memory,
and close files, returning the Dataset or DataArray. These functions are
helpful for avoiding file-lock errors when trying to write to files opened
using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`)
By `Dan Nowacki <https://github.com/dnowacki-usgs>`_.

Bug fixes
~~~~~~~~~
Expand Down Expand Up @@ -153,9 +159,9 @@ Other enhancements
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
- Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
By `Kevin Squire <https://github.com/kmsquire>`_.
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
added to remove the original zarr chunk encoding.
By `Lily Wang <https://github.com/lilyminium>`_.

Expand Down
2 changes: 1 addition & 1 deletion xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .core.options import set_options

from .backends.api import (open_dataset, open_dataarray, open_mfdataset,
save_mfdataset)
save_mfdataset, load_dataset, load_dataarray)
from .backends.rasterio_ import open_rasterio
from .backends.zarr import open_zarr

Expand Down
57 changes: 55 additions & 2 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,64 @@ def _finalize_store(write, store):
store.close()


def load_dataset(filename_or_obj, **kwargs):
"""Open, load into memory, and close a Dataset from a file or file-like
object.

This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs
from `open_dataset` in that it loads the Dataset into memory, closes the
file, and returns the Dataset. In contrast, `open_dataset` keeps the file
handle open and lazy loads its contents. All parameters are passed directly
to `open_dataset`. See that documentation for further details.

Returns
-------
dataset : Dataset
The newly created Dataset.

See Also
--------
open_dataset
"""
dnowacki-usgs marked this conversation as resolved.
Show resolved Hide resolved
if 'cache' in kwargs:
raise TypeError('cache has no effect in this context')

with open_dataset(filename_or_obj, **kwargs) as ds:
return ds.load()


def load_dataarray(filename_or_obj, **kwargs):
"""Open, load into memory, and close a DataArray from a file or file-like
object containing a single data variable.

This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs
from `open_dataarray` in that it loads the Dataset into memory, closes the
file, and returns the Dataset. In contrast, `open_dataarray` keeps the file
handle open and lazy loads its contents. All parameters are passed directly
to `open_dataarray`. See that documentation for further details.

Returns
-------
datarray : DataArray
The newly created DataArray.

See Also
--------
open_dataarray
"""
if 'cache' in kwargs:
raise TypeError('cache has no effect in this context')

with open_dataarray(filename_or_obj, **kwargs) as da:
return da.load()


def open_dataset(filename_or_obj, group=None, decode_cf=True,
mask_and_scale=None, decode_times=True, autoclose=None,
concat_characters=True, decode_coords=True, engine=None,
chunks=None, lock=None, cache=None, drop_variables=None,
backend_kwargs=None, use_cftime=None):
"""Load and decode a dataset from a file or file-like object.
"""Open and decode a dataset from a file or file-like object.

Parameters
----------
Expand Down Expand Up @@ -406,7 +458,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
concat_characters=True, decode_coords=True, engine=None,
chunks=None, lock=None, cache=None, drop_variables=None,
backend_kwargs=None, use_cftime=None):
"""Open an DataArray from a netCDF file containing a single data variable.
"""Open an DataArray from a file or file-like object containing a single
data variable.

This is designed to read netCDF files with only one data variable. If
multiple variables are present then a ValueError is raised.
Expand Down
19 changes: 18 additions & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import xarray as xr
from xarray import (
DataArray, Dataset, backends, open_dataarray, open_dataset, open_mfdataset,
save_mfdataset)
save_mfdataset, load_dataset, load_dataarray)
from xarray.backends.common import robust_getitem
from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding
from xarray.backends.pydap_ import PydapDataStore
Expand Down Expand Up @@ -2641,6 +2641,23 @@ def test_save_mfdataset_compute_false_roundtrip(self):
with open_mfdataset([tmp1, tmp2]) as actual:
assert_identical(actual, original)

def test_load_dataset(self):
with create_tmp_file() as tmp:
original = Dataset({'foo': ('x', np.random.randn(10))})
original.to_netcdf(tmp)
ds = load_dataset(tmp)
# this would fail if we used open_dataset instead of load_dataset
ds.to_netcdf(tmp)

def test_load_dataarray(self):
with create_tmp_file() as tmp:
original = Dataset({'foo': ('x', np.random.randn(10))})
original.to_netcdf(tmp)
ds = load_dataarray(tmp)
# this would fail if we used open_dataarray instead of
# load_dataarray
ds.to_netcdf(tmp)


@requires_scipy_or_netCDF4
@requires_pydap
Expand Down
15 changes: 5 additions & 10 deletions xarray/tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir,
github_url='https://github.com/pydata/xarray-data',
branch='master', **kws):
"""
Load a dataset from the online repository (requires internet).
Open a dataset from the online repository (requires internet).

If a local copy is found then always use that to avoid network traffic.

Expand Down Expand Up @@ -91,17 +91,12 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir,

def load_dataset(*args, **kwargs):
"""
`load_dataset` will be removed a future version of xarray. The current
behavior of this function can be achived by using
`tutorial.open_dataset(...).load()`.
Open, load into memory, and close a dataset from the online repository
(requires internet).

See Also
--------
open_dataset
"""
warnings.warn(
"load_dataset` will be removed in a future version of xarray. The "
"current behavior of this function can be achived by using "
"`tutorial.open_dataset(...).load()`.",
DeprecationWarning, stacklevel=2)
return open_dataset(*args, **kwargs).load()
with open_dataset(*args, **kwargs) as ds:
return ds.load()