Skip to content

Commit

Permalink
Fix overflow issue in decode_cf_datetime for dtypes <= np.uint32 (pyd…
Browse files Browse the repository at this point in the history
  • Loading branch information
spencerkclark authored May 15, 2022
1 parent e02b1c3 commit 8de7061
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ Bug fixes
- :py:meth:`isel` with `drop=True` works as intended with scalar :py:class:`DataArray` indexers.
(:issue:`6554`, :pull:`6579`)
By `Michael Niklas <https://github.com/headtr1ck>`_.
- Fixed silent overflow issue when decoding times encoded with 32-bit and below
unsigned integer data types (:issue:`6589`, :pull:`6598`). By `Spencer Clark
<https://github.com/spencerkclark>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
9 changes: 6 additions & 3 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,12 @@ def _decode_datetime_with_pandas(flat_num_dates, units, calendar):
pd.to_timedelta(flat_num_dates.max(), delta) + ref_date

# To avoid integer overflow when converting to nanosecond units for integer
# dtypes smaller than np.int64 cast all integer-dtype arrays to np.int64
# (GH 2002).
if flat_num_dates.dtype.kind == "i":
# dtypes smaller than np.int64 cast all integer and unsigned integer dtype
# arrays to np.int64 (GH 2002, GH 6589). Note this is safe even in the case
# of np.uint64 values, because any np.uint64 value that would lead to
# overflow when converting to np.int64 would not be representable with a
# timedelta64 value, and therefore would raise an error in the lines above.
if flat_num_dates.dtype.kind in "iu":
flat_num_dates = flat_num_dates.astype(np.int64)

# Cast input ordinals to integers of nanoseconds because pd.to_timedelta
Expand Down
27 changes: 27 additions & 0 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,3 +1121,30 @@ def test_should_cftime_be_used_target_not_npable():
ValueError, match="Calendar 'noleap' is only valid with cftime."
):
_should_cftime_be_used(src, "noleap", False)


@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64])
def test_decode_cf_datetime_uint(dtype):
units = "seconds since 2018-08-22T03:23:03Z"
num_dates = dtype(50)
result = decode_cf_datetime(num_dates, units)
expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns"))
np.testing.assert_equal(result, expected)


@requires_cftime
def test_decode_cf_datetime_uint64_with_cftime():
units = "days since 1700-01-01"
num_dates = np.uint64(182621)
result = decode_cf_datetime(num_dates, units)
expected = np.asarray(np.datetime64("2200-01-01", "ns"))
np.testing.assert_equal(result, expected)


@requires_cftime
def test_decode_cf_datetime_uint64_with_cftime_overflow_error():
units = "microseconds since 1700-01-01"
calendar = "360_day"
num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000)
with pytest.raises(OverflowError):
decode_cf_datetime(num_dates, units, calendar)

0 comments on commit 8de7061

Please sign in to comment.