Skip to content

Commit

Permalink
transform numpy object-dtype strings (vlen) to numpy unicode strings
Browse files Browse the repository at this point in the history
  • Loading branch information
kmuehlbauer committed Mar 21, 2023
1 parent 2041f6e commit 4b65ffd
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
9 changes: 9 additions & 0 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,15 @@ def decode_cf_variable(
if decode_times:
var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)

# transform numpy object-dtype strings to numpy unicode strings
if (
"dtype" in var.encoding
and var.encoding["dtype"] == str
and original_dtype == object
):
original_dtype = var.encoding["dtype"]
var = var.astype(var.encoding["dtype"])

dimensions, data, attributes, encoding = variables.unpack_for_decoding(var)
# TODO(shoyer): convert everything below to use coders

Expand Down
1 change: 1 addition & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,7 @@ def test_encoding_kwarg_vlen_string(self) -> None:
with self.roundtrip(original, save_kwargs=kwargs) as actual:
assert actual["x"].encoding["dtype"] is str
assert_identical(actual, expected)
assert actual["x"].dtype == "<U3"

def test_roundtrip_string_with_fill_value_vlen(self) -> None:
values = np.array(["ab", "cdef", np.nan], dtype=object)
Expand Down

0 comments on commit 4b65ffd

Please sign in to comment.