From b61deebecbeba978c820f1ca87fd679b66c6059c Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 8 Nov 2019 17:51:41 -0700 Subject: [PATCH 1/6] Replace `equivalent()` with `allclose_or_equiv()` --- xarray/coding/variables.py | 9 +++++++-- xarray/tests/test_coding.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 5f9c8932b6b..782ed100c96 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -8,7 +8,6 @@ from ..core import dtypes, duck_array_ops, indexing from ..core.pycompat import dask_array_type -from ..core.utils import equivalent from ..core.variable import Variable @@ -152,18 +151,24 @@ def encode(self, variable, name=None): fv = encoding.get("_FillValue") mv = encoding.get("missing_value") - if fv is not None and mv is not None and not equivalent(fv, mv): + if ( + fv is not None + and mv is not None + and not duck_array_ops.allclose_or_equiv(fv, mv) + ): raise ValueError( "Variable {!r} has multiple fill values {}. " "Cannot encode data. ".format(name, [fv, mv]) ) if fv is not None: + # TODO: Cast _FillValue to the same dtype as the data fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) if mv is not None: + # TODO: Cast missing_value to the same dtype as the data fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and fv is None: data = duck_array_ops.fillna(data, fill_value) diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 6cd584daa96..ecee2f420cc 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -20,6 +20,19 @@ def test_CFMaskCoder_decode(): assert_identical(expected, encoded) +def test_CFMaskCoder_encode_missing_fill_values_conflict(): + original = xr.Variable( + ("x",), + [0.0, -1.0, 1.0], + encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)}, + ) + coder = variables.CFMaskCoder() + encoded = coder.encode(original) + + assert encoded.dtype == encoded.attrs["missing_value"].dtype + assert encoded.dtype == encoded.attrs["_FillValue"].dtype + + def test_CFMaskCoder_missing_value(): expected = xr.DataArray( np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]), From 0740853df1f873fad460520e1c57e7405f1f2bd4 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 8 Nov 2019 18:24:23 -0700 Subject: [PATCH 2/6] Ensure _FillValue & missing_value are cast to same dtype as data's --- xarray/coding/variables.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 782ed100c96..db7647f8569 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -162,13 +162,17 @@ def encode(self, variable, name=None): ) if fv is not None: - # TODO: Cast _FillValue to the same dtype as the data + encoding["_FillValue"] = np.asarray(encoding["_FillValue"]).astype( + data.dtype + ) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) if mv is not None: - # TODO: Cast missing_value to the same dtype as the data + encoding["missing_value"] = np.asarray(encoding["missing_value"]).astype( + data.dtype + ) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and fv is None: data = duck_array_ops.fillna(data, fill_value) From 404bd75a392a650abd2abf484e58bb9c2c3fc593 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 11 Nov 2019 22:42:51 -0700 Subject: [PATCH 3/6] Use Numpy scalar during type casting --- xarray/coding/variables.py | 10 ++++------ xarray/tests/test_coding.py | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index db7647f8569..e0b597e222d 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -162,17 +162,15 @@ def encode(self, variable, name=None): ) if fv is not None: - encoding["_FillValue"] = np.asarray(encoding["_FillValue"]).astype( - data.dtype - ) + # Ensure _FillValue is cast to same dtype as data's + encoding["_FillValue"] = data.dtype.type(encoding["_FillValue"]) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) if mv is not None: - encoding["missing_value"] = np.asarray(encoding["missing_value"]).astype( - data.dtype - ) + # Ensure missing_value is cast to same dtype as data's + encoding["missing_value"] = data.dtype.type(encoding["missing_value"]) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and fv is None: data = duck_array_ops.fillna(data, fill_value) diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index ecee2f420cc..202461926ca 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -32,6 +32,9 @@ def test_CFMaskCoder_encode_missing_fill_values_conflict(): assert encoded.dtype == encoded.attrs["missing_value"].dtype assert encoded.dtype == encoded.attrs["_FillValue"].dtype + roundtripped = coder.decode(coder.encode(original)) + assert_identical(roundtripped, original) + def test_CFMaskCoder_missing_value(): expected = xr.DataArray( From d3e9562d7f33353a0906d4ba1eb2201bd92bf4c9 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 12 Nov 2019 10:35:14 -0700 Subject: [PATCH 4/6] Update ValueError message --- xarray/coding/variables.py | 7 +++---- xarray/tests/test_coding.py | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index e0b597e222d..2b5f87ab0cd 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -157,20 +157,19 @@ def encode(self, variable, name=None): and not duck_array_ops.allclose_or_equiv(fv, mv) ): raise ValueError( - "Variable {!r} has multiple fill values {}. " - "Cannot encode data. ".format(name, [fv, mv]) + f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data." ) if fv is not None: # Ensure _FillValue is cast to same dtype as data's - encoding["_FillValue"] = data.dtype.type(encoding["_FillValue"]) + encoding["_FillValue"] = data.dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) if mv is not None: # Ensure missing_value is cast to same dtype as data's - encoding["missing_value"] = data.dtype.type(encoding["missing_value"]) + encoding["missing_value"] = data.dtype.type(mv) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and fv is None: data = duck_array_ops.fillna(data, fill_value) diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 202461926ca..c8608e869a1 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -31,9 +31,10 @@ def test_CFMaskCoder_encode_missing_fill_values_conflict(): assert encoded.dtype == encoded.attrs["missing_value"].dtype assert encoded.dtype == encoded.attrs["_FillValue"].dtype - - roundtripped = coder.decode(coder.encode(original)) - assert_identical(roundtripped, original) + + with pytest.warns(variables.SerializationWarning): + roundtripped = coder.decode(coder.encode(original)) + assert_identical(roundtripped, original) def test_CFMaskCoder_missing_value(): From 3aa18f000d488f60996b1599d72036af5e613f53 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 12 Nov 2019 10:36:06 -0700 Subject: [PATCH 5/6] Formatting only --- xarray/tests/test_coding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index c8608e869a1..3e0474e7b60 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -31,7 +31,7 @@ def test_CFMaskCoder_encode_missing_fill_values_conflict(): assert encoded.dtype == encoded.attrs["missing_value"].dtype assert encoded.dtype == encoded.attrs["_FillValue"].dtype - + with pytest.warns(variables.SerializationWarning): roundtripped = coder.decode(coder.encode(original)) assert_identical(roundtripped, original) From 43b84d4b8688f7e9e5dcf036ab4e22922b9dabe6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 12 Nov 2019 12:10:14 -0700 Subject: [PATCH 6/6] Update whats-new.rst --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 96f0ba9a4a6..268669f9dd3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -79,6 +79,8 @@ New Features Bug fixes ~~~~~~~~~ +- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`) + By `Anderson Banihirwe `_. - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).