Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't set encoding attributes on bounds variables. #2965

Merged
merged 18 commits into from
Jun 25, 2019
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ Enhancements
Bug fixes
~~~~~~~~~

- Don't set attributes on bounds variables when writing to netCDF.
:py:meth:`xr.open_mfdataset` sets variable encodings to that of variables
in first file.(:issue:`2436`, :issue:`2921`)
By `Deepak Cherian <https://github.com/dcherian>`_.
- indexing with an empty list creates an object with zero-length axis (:issue:`2882`)
By `Mayeul d'Avezac <https://github.com/mdavezac>`_.
- Return correct count for scalar datetime64 arrays (:issue:`2770`)
Expand Down
3 changes: 3 additions & 0 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,9 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,

combined._file_obj = _MultiFileCloser(file_objs)
combined.attrs = datasets[0].attrs
for v in combined.variables:
if v in datasets[0]:
combined[v].encoding = datasets[0][v].encoding
dcherian marked this conversation as resolved.
Show resolved Hide resolved
return combined


Expand Down
35 changes: 25 additions & 10 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,6 @@ def cf_decoder(variables, attributes,
"""
Decode a set of CF encoded variables and attributes.

See Also, decode_cf_variable

Parameters
----------
variables : dict
Expand All @@ -514,6 +512,10 @@ def cf_decoder(variables, attributes,
A dictionary mapping from variable name to xarray.Variable objects.
decoded_attributes : dict
A dictionary mapping from attribute name to values.

See also
--------
decode_cf_variable
"""
variables, attributes, _ = decode_cf_variables(
variables, attributes, concat_characters, mask_and_scale, decode_times)
Expand Down Expand Up @@ -594,14 +596,12 @@ def encode_dataset_coordinates(dataset):

def cf_encoder(variables, attributes):
"""
A function which takes a dicts of variables and attributes
and encodes them to conform to CF conventions as much
as possible. This includes masking, scaling, character
array handling, and CF-time encoding.

Decode a set of CF encoded variables and attributes.
Encode a set of CF encoded variables and attributes.
Takes a dicts of variables and attributes and encodes them
to conform to CF conventions as much as possible.
This includes masking, scaling, character array handling,
and CF-time encoding.

See Also, decode_cf_variable

Parameters
----------
Expand All @@ -617,8 +617,23 @@ def cf_encoder(variables, attributes):
encoded_attributes : dict
A dictionary mapping from attribute name to value

See also: encode_cf_variable
See also
--------
decode_cf_variable, encode_cf_variable
"""
new_vars = OrderedDict((k, encode_cf_variable(v, name=k))
for k, v in variables.items())

# Remove attrs from bounds variables (issue #2921)
for var in new_vars.values():
bounds = var.attrs['bounds'] if 'bounds' in var.attrs else None
if bounds and bounds in new_vars:
# see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries # noqa
for attr in ['units', 'standard_name', 'axis', 'positive',
'calendar', 'long_name', 'leap_month', 'leap_year',
'month_lengths']:
if attr in new_vars[bounds].attrs and attr in var.attrs:
if new_vars[bounds].attrs[attr] == var.attrs[attr]:
new_vars[bounds].attrs.pop(attr)

Copy link
Collaborator

@mathause mathause May 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we issue a warning here?

else:
    warning.warn("The attribute 'units' is not the same in the variable "
"'time' and it's associated bounds 'time_bnds',"
" which is not cf-compliant.")

or some such

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to? xarray allows writing CF-non-compliant files anyway...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the warning you added is the perfect approach. It will still be issued in the case of @mathause's example, but will still allow a user to write a non-CF-compliant file without a warning if the encoding attributes do not need to be computed on the fly.

return new_vars, attributes
20 changes: 20 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2474,6 +2474,26 @@ def test_attrs_mfdataset(self):
'no attribute'):
actual.test2

def test_encoding_mfdataset(self):
original = Dataset({'foo': ('t', np.random.randn(10)),
't': ('t', pd.date_range(start='2010-01-01',
periods=10,
freq='1D'))})
original.t.encoding['units'] = 'days since 2010-01-01'

with create_tmp_file() as tmp1:
with create_tmp_file() as tmp2:
ds1 = original.isel(t=slice(5))
ds2 = original.isel(t=slice(5, 10))
ds1.t.encoding['units'] = 'days since 2010-01-01'
ds2.t.encoding['units'] = 'days since 2000-01-01'
ds1.to_netcdf(tmp1)
ds2.to_netcdf(tmp2)
with open_mfdataset([tmp1, tmp2]) as actual:
assert actual.t.encoding['units'] == original.t.encoding['units'] # noqa
assert actual.t.encoding['units'] == ds1.t.encoding['units'] # noqa
assert actual.t.encoding['units'] != ds2.t.encoding['units'] # noqa

def test_preprocess_mfdataset(self):
original = Dataset({'foo': ('x', np.random.randn(10))})
with create_tmp_file() as tmp:
Expand Down
40 changes: 38 additions & 2 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import pandas as pd
import pytest

from xarray import DataArray, Variable, coding, decode_cf
from xarray import DataArray, Dataset, Variable, coding, decode_cf
from xarray.backends.api import open_dataset, open_mfdataset
from xarray.coding.times import (
_import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime)
from xarray.coding.variables import SerializationWarning
from xarray.conventions import _update_bounds_attributes
from xarray.conventions import _update_bounds_attributes, cf_encoder
from xarray.core.common import contains_cftime_datetimes
from xarray.testing import assert_equal

Expand Down Expand Up @@ -671,6 +672,41 @@ def test_decode_cf_time_bounds():
_update_bounds_attributes(ds.variables)


def test_encode_time_bounds():
dcherian marked this conversation as resolved.
Show resolved Hide resolved
# create time and time_bounds DataArrays for Jan-1850 and Feb-1850
time_bounds_vals = np.array([[0.0, 31.0], [31.0, 59.0]],
dtype=np.int64)
time_vals = time_bounds_vals.mean(axis=1)

time_var = DataArray(time_vals, dims='time',
coords={'time': time_vals})
time_bounds_var = DataArray(time_bounds_vals, dims=('time', 'd2'),
coords={'time': time_vals})

# create Dataset of time and time_bounds
ds = Dataset(coords={'time': time_var},
data_vars={'time_bounds': time_bounds_var})
ds.time.attrs = {'bounds': 'time_bounds', 'calendar': 'noleap',
'units': 'days since 1850-01-01'}

# if time_bounds attrs are same as time attrs, pop them.
ds.time_bounds.attrs = {'calendar': 'noleap',
'units': 'days since 1850-01-01'}
encoded, _ = cf_encoder({k: ds[k] for k in ds.variables},
ds.attrs)
assert 'calendar' not in encoded['time_bounds'].attrs
assert 'units' not in encoded['time_bounds'].attrs

# for CF-noncompliant case of time_bounds attrs being different from
# time attrs; preserve them for faithful roundtrip
ds.time_bounds.attrs = {'calendar': 'noleap',
'units': 'days since 1849-01-01'}
encoded, _ = cf_encoder({k: ds[k] for k in ds.variables},
ds.attrs)
assert 'calendar' not in encoded['time_bounds'].attrs
assert encoded['time_bounds'].attrs['units'] == ds.time_bounds.attrs['units'] # noqa


@pytest.fixture(params=_ALL_CALENDARS)
def calendar(request):
return request.param
Expand Down