From ca5050d0c713689750fe39f8e4f1a018a5e9a785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Mon, 14 Oct 2024 16:03:09 +0200 Subject: [PATCH] set default resolution to "s", which actually means, use pandas lowest resolution, fix code and tests to allow this --- xarray/coding/times.py | 11 +++++++- xarray/core/options.py | 2 +- xarray/tests/__init__.py | 7 +++-- xarray/tests/test_backends.py | 7 ++--- xarray/tests/test_coding_times.py | 44 ++++++++++++++++++++++--------- xarray/tests/test_conventions.py | 7 ++--- xarray/tests/test_dataset.py | 14 +++++----- 7 files changed, 63 insertions(+), 29 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 82e15b6ab35..96641a83687 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -24,6 +24,7 @@ from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item +from xarray.core.options import _get_datetime_resolution from xarray.core.pdcompat import _timestamp_as_unit, default_precision_timestamp from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable @@ -98,6 +99,13 @@ def _is_numpy_compatible_time_range(times): tmin = times.min() tmax = times.max() try: + # before relaxing the nanosecond constrained + # this raised OutOfBoundsDatetime for + # times < 1678 and times > 2262 + # this isn't the case anymore for other resolutions like "s" + # now, we raise for dates before 1582-10-15 + _check_date_is_after_shift(tmin, "standard") + _check_date_is_after_shift(tmax, "standard") convert_time_or_go_back(tmin, pd.Timestamp) convert_time_or_go_back(tmax, pd.Timestamp) except pd.errors.OutOfBoundsDatetime: @@ -290,7 +298,7 @@ def _check_date_is_after_shift(date: pd.Timestamp, calendar: str) -> None: # proleptic_gregorian and standard/gregorian are only equivalent # if reference date and date range is >= 1582-10-15 if calendar != "proleptic_gregorian": - if date < pd.Timestamp("1582-10-15"): + if date < type(date)(1582, 10, 15): raise OutOfBoundsDatetime( f"Dates before 1582-10-15 cannot be decoded " f"with pandas using {calendar!r} calendar." @@ -318,6 +326,7 @@ def _decode_datetime_with_pandas( try: time_unit, ref_date = _unpack_time_unit_and_ref_date(units) ref_date = _align_reference_date_and_unit(ref_date, time_unit) + ref_date = _align_reference_date_and_unit(ref_date, _get_datetime_resolution()) except ValueError as err: # ValueError is raised by pd.Timestamp for non-ISO timestamp # strings, in which case we fall back to using cftime diff --git a/xarray/core/options.py b/xarray/core/options.py index dd6a1620061..f185987a88e 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -267,7 +267,7 @@ class set_options: warn_for_unclosed_files : bool, default: False Whether or not to issue a warning when unclosed files are deallocated. This is mostly useful for debugging. - time_resolution : {"s", "ms", "us", "ns"}, default: "ns" + time_resolution : {"s", "ms", "us", "ns"}, default: "s" Time resolution used for CF encoding/decoding. Examples diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index a55b377d2c0..5d17624cc9d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -18,7 +18,7 @@ from xarray import Dataset from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401 from xarray.core.extension_array import PandasExtensionArray -from xarray.core.options import set_options +from xarray.core.options import _get_datetime_resolution, set_options from xarray.core.variable import IndexVariable from xarray.testing import ( # noqa: F401 assert_chunks_equal, @@ -323,7 +323,10 @@ def create_test_data( f'Not enough letters for filling this dimension size ({_dims["dim3"]})' ) obj["dim3"] = ("dim3", list(string.ascii_lowercase[0 : _dims["dim3"]])) - obj["time"] = ("time", pd.date_range("2000-01-01", periods=20, unit="s")) + obj["time"] = ( + "time", + pd.date_range("2000-01-01", periods=20, unit=f"{_get_datetime_resolution()}"), + ) for v, dims in sorted(_vars.items()): data = rs.normal(size=tuple(_dims[d] for d in dims)) obj[v] = (dims, data) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 45d56b63e52..5d85e6c04e4 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -53,7 +53,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing -from xarray.core.options import set_options +from xarray.core.options import _get_datetime_resolution, set_options from xarray.core.utils import module_available from xarray.namedarray.pycompat import array_type from xarray.tests import ( @@ -1590,8 +1590,9 @@ def test_open_encodings(self) -> None: expected = Dataset() - # todo: check, if specifying "s" is enough - time = pd.date_range("1999-01-05", periods=10, unit="s") + time = pd.date_range( + "1999-01-05", periods=10, unit=f"{_get_datetime_resolution()}" + ) encoding = {"units": units, "dtype": np.dtype("int32")} expected["time"] = ("time", time, {}, encoding) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 10de701412e..dbb014c14ff 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -39,6 +39,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import _update_bounds_attributes, cf_encoder from xarray.core.common import contains_cftime_datetimes +from xarray.core.options import _get_datetime_resolution from xarray.core.utils import is_duck_dask_array from xarray.testing import assert_equal, assert_identical from xarray.tests import ( @@ -134,7 +135,9 @@ def test_cf_datetime(num_dates, units, calendar) -> None: max_y = np.ravel(np.atleast_1d(expected))[np.nanargmax(num_dates)] # .year typ = type(min_y) border = typ(1582, 10, 15) - if calendar == "proleptic_gregorian" or (min_y >= border and max_y >= border): + if (calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns") or ( + min_y >= border and max_y >= border + ): expected = cftime_to_nptime(expected) with warnings.catch_warnings(): @@ -214,12 +217,15 @@ def test_decode_standard_calendar_inside_timestamp_range(calendar) -> None: import cftime units = "days since 0001-01-01" - unit = cast(Literal["s", "ms", "us", "ns"], "us") + unit = cast(Literal["s", "ms", "us", "ns"], _get_datetime_resolution()) times = pd.date_range("2001-04-01-00", end="2001-04-30-23", unit=unit, freq="h") + # to_pydatetime() will return microsecond time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar) expected = times.values - if calendar == "proleptic_gregorian": - unit = "s" + # for cftime we get "us" resolution + # ns resolution is handled by cftime, too (OutOfBounds) + if calendar != "proleptic_gregorian" or _get_datetime_resolution() == "ns": + unit = "us" expected_dtype = np.dtype(f"M8[{unit}]") actual = decode_cf_datetime(time, units, calendar=calendar) assert actual.dtype == expected_dtype @@ -268,7 +274,7 @@ def test_decode_dates_outside_timestamp_range(calendar) -> None: time, units, calendar=calendar, only_use_cftime_datetimes=True ) # special case proleptic_gregorian - if calendar == "proleptic_gregorian": + if calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns": expected = expected.astype("=M8[us]") expected_date_type = type(expected[0]) @@ -289,7 +295,11 @@ def test_decode_standard_calendar_single_element_inside_timestamp_range( calendar, ) -> None: units = "days since 0001-01-01" - unit = "s" if calendar == "proleptic_gregorian" else "us" + unit = ( + _get_datetime_resolution() + if (calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns") + else "us" + ) for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") @@ -337,7 +347,11 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range( import cftime units = "days since 0001-01-01" - unit = "s" if calendar == "proleptic_gregorian" else "us" + unit = ( + _get_datetime_resolution() + if (calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns") + else "us" + ) times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") times2 = pd.date_range("2001-05-01", end="2001-05-05", freq="D") time1 = cftime.date2num(times1.to_pydatetime(), units, calendar=calendar) @@ -426,8 +440,8 @@ def test_decode_multidim_time_outside_timestamp_range(calendar) -> None: actual = decode_cf_datetime(mdim_time, units, calendar=calendar) dtype: np.dtype - if calendar == "proleptic_gregorian": - dtype = np.dtype("=M8[s]") + if calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns": + dtype = np.dtype(f"=M8[{_get_datetime_resolution()}]") expected1 = expected1.astype(dtype) expected2 = expected2.astype(dtype) else: @@ -528,7 +542,7 @@ def test_decoded_cf_datetime_array_2d() -> None: ("x", "y"), np.array([[0, 1], [2, 3]]), {"units": "days since 2000-01-01"} ) result = CFDatetimeCoder().decode(variable) - assert result.dtype == "datetime64[s]" + assert result.dtype == f"datetime64[{_get_datetime_resolution()}]" expected = pd.date_range("2000-01-01", periods=4).values.reshape(2, 2) assert_array_equal(np.asarray(result), expected) @@ -697,7 +711,7 @@ def test_decode_cf(calendar) -> None: if calendar not in _STANDARD_CALENDARS: assert ds.test.dtype == np.dtype("O") else: - assert ds.test.dtype == np.dtype("M8[s]") + assert ds.test.dtype == np.dtype(f"M8[{_get_datetime_resolution()}]") def test_decode_cf_time_bounds() -> None: @@ -722,7 +736,7 @@ def test_decode_cf_time_bounds() -> None: "calendar": "standard", } dsc = decode_cf(ds) - assert dsc.time_bnds.dtype == np.dtype("M8[s]") + assert dsc.time_bnds.dtype == np.dtype(f"M8[{_get_datetime_resolution()}]") dsc = decode_cf(ds, decode_times=False) assert dsc.time_bnds.dtype == np.dtype("int64") @@ -1299,7 +1313,11 @@ def test_roundtrip_datetime64_nanosecond_precision( assert encoded_var.data.dtype == dtype decoded_var = conventions.decode_cf_variable("foo", encoded_var) - assert decoded_var.dtype == np.dtype(f"=M8[{timeunit}]") + if _get_datetime_resolution() == "ns": + dtypeunit = "ns" + else: + dtypeunit = timeunit + assert decoded_var.dtype == np.dtype(f"=M8[{dtypeunit}]") assert ( decoded_var.encoding["units"] == f"{_numpy_to_netcdf_timeunit(timeunit)} since 1970-01-01 00:00:00" diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 7d86cb7c036..bcd49f1b608 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -19,6 +19,7 @@ from xarray.backends.common import WritableCFDataStore from xarray.backends.memory import InMemoryDataStore from xarray.conventions import decode_cf +from xarray.core.options import _get_datetime_resolution from xarray.testing import assert_identical from xarray.tests import ( assert_array_equal, @@ -364,7 +365,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self) -> None: attrs = {"units": "days since 1900-01-01"} ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)})) - assert "(time) datetime64[s]" in repr(ds) + assert f"(time) datetime64[{_get_datetime_resolution()}]" in repr(ds) @requires_cftime def test_decode_cf_datetime_transition_to_invalid(self) -> None: @@ -447,13 +448,13 @@ def test_decode_cf_time_kwargs(self) -> None: dsc = conventions.decode_cf(ds) assert dsc.timedelta.dtype == np.dtype("m8[ns]") - assert dsc.time.dtype == np.dtype("M8[s]") + assert dsc.time.dtype == np.dtype(f"M8[{_get_datetime_resolution()}]") dsc = conventions.decode_cf(ds, decode_times=False) assert dsc.timedelta.dtype == np.dtype("int64") assert dsc.time.dtype == np.dtype("int64") dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False) assert dsc.timedelta.dtype == np.dtype("int64") - assert dsc.time.dtype == np.dtype("M8[s]") + assert dsc.time.dtype == np.dtype(f"M8[{_get_datetime_resolution()}]") dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True) assert dsc.timedelta.dtype == np.dtype("m8[ns]") assert dsc.time.dtype == np.dtype("int64") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 053acd34b1e..d4a19883a54 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -39,6 +39,7 @@ from xarray.core.common import duck_array_ops, full_like from xarray.core.coordinates import Coordinates, DatasetCoordinates from xarray.core.indexes import Index, PandasIndex +from xarray.core.options import _get_datetime_resolution from xarray.core.types import ArrayLike from xarray.core.utils import is_scalar from xarray.groupers import TimeResampler @@ -290,7 +291,7 @@ def test_repr(self) -> None: Coordinates: * dim2 (dim2) float64 72B 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 * dim3 (dim3) {} 40B 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' - * time (time) datetime64[s] 160B 2000-01-01 2000-01-02 ... 2000-01-20 + * time (time) datetime64[{}] 160B 2000-01-01 2000-01-02 ... 2000-01-20 numbers (dim3) int64 80B 0 1 2 0 0 1 1 2 2 3 Dimensions without coordinates: dim1 Data variables: @@ -299,7 +300,8 @@ def test_repr(self) -> None: var3 (dim3, dim1) float64 640B 0.5565 -0.2121 0.4563 ... -0.2452 -0.3616 Attributes: foo: bar""".format( - data["dim3"].dtype + data["dim3"].dtype, + _get_datetime_resolution(), ) ) actual = "\n".join(x.rstrip() for x in repr(data).split("\n")) @@ -442,8 +444,8 @@ def test_info(self) -> None: ds.info(buf=buf) expected = dedent( - """\ - xarray.Dataset { + f"""\ + xarray.Dataset {{ dimensions: \tdim2 = 9 ; \ttime = 20 ; @@ -452,7 +454,7 @@ def test_info(self) -> None: variables: \tfloat64 dim2(dim2) ; - \tdatetime64[s] time(time) ; + \tdatetime64[{_get_datetime_resolution()}] time(time) ; \tfloat64 var1(dim1, dim2) ; \t\tvar1:foo = variable ; \tfloat64 var2(dim1, dim2) ; @@ -464,7 +466,7 @@ def test_info(self) -> None: // global attributes: \t:unicode_attr = ba® ; \t:string_attr = bar ; - }""" + }}""" ) actual = buf.getvalue() assert expected == actual