From b896c8a44afafadf72cb63df73026d2dc1c89442 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 11 May 2023 12:10:51 -0700 Subject: [PATCH] Update DevOps to cache conda and fix attributes not being preserved with `xarray > 2023.3.0` (#465) --- .github/workflows/build_workflow.yml | 40 +- conda-env/ci.yml | 4 +- conda-env/dev.yml | 2 +- conda-env/readthedocs.yml | 2 +- tests/test_bounds.py | 91 ++- tests/test_dataset.py | 955 ++++----------------------- tests/test_regrid.py | 4 +- tests/test_temporal.py | 66 +- xcdat/{logger.py => _logger.py} | 28 +- xcdat/bounds.py | 4 +- xcdat/dataset.py | 4 +- xcdat/regridder/base.py | 4 +- xcdat/temporal.py | 12 +- 13 files changed, 285 insertions(+), 931 deletions(-) rename xcdat/{logger.py => _logger.py} (64%) diff --git a/.github/workflows/build_workflow.yml b/.github/workflows/build_workflow.yml index ddf0baf0..875b6395 100644 --- a/.github/workflows/build_workflow.yml +++ b/.github/workflows/build_workflow.yml @@ -62,32 +62,44 @@ jobs: - if: ${{ steps.skip_check.outputs.should_skip != 'true' }} uses: actions/checkout@v3 - - if: ${{ steps.skip_check.outputs.should_skip != 'true' }} - name: Cache Conda - uses: actions/cache@v3 - env: - # Increase this value to reset cache if conda-env/ci.yml has not changed in the workflow - CACHE_NUMBER: 0 - with: - path: ~/conda_pkgs_dir - key: ${{ runner.os }}-${{ matrix.python-version }}-conda-${{ env.CACHE_NUMBER }} - - if: ${{ steps.skip_check.outputs.should_skip != 'true' }} name: Set up Conda Environment uses: conda-incubator/setup-miniconda@v2 with: - activate-environment: "xcdat_ci" miniforge-variant: Mambaforge miniforge-version: latest + activate-environment: "xcdat_ci" use-mamba: true mamba-version: "*" - environment-file: conda-env/ci.yml channel-priority: strict auto-update-conda: true - # IMPORTANT: This needs to be set for caching to work properly! - use-only-tar-bz2: true python-version: ${{ matrix.python-version }} + # Refresh the cache every 24 hours to avoid inconsistencies of package versions + # between the CI pipeline and local installations. + - if: ${{ steps.skip_check.outputs.should_skip != 'true' }} + id: get-date + name: Get Date + run: echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT + shell: bash + + - if: ${{ steps.skip_check.outputs.should_skip != 'true' }} + id: cache + name: Cache Conda env + uses: actions/cache@v3 + with: + path: ${{ env.CONDA }}/envs + key: + conda-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ + steps.get-date.outputs.today }}-${{hashFiles('conda-env/ci.yml') }}-${{ env.CACHE_NUMBER}} + env: + # Increase this value to reset cache if conda-env/ci.yml has not changed in the workflow + CACHE_NUMBER: 0 + + - if: $${{ steps.skip_check.outputs.should_skip != 'true' && steps.cache.outputs.cache-hit != 'true' }} + name: Update environment + run: mamba env update -n xcdat_ci -f conda-env/ci.yml + - if: ${{ steps.skip_check.outputs.should_skip != 'true' }} name: Install xcdat # Source: https://github.com/conda/conda-build/issues/4251#issuecomment-1053460542 diff --git a/conda-env/ci.yml b/conda-env/ci.yml index e5a128a6..af73a5df 100644 --- a/conda-env/ci.yml +++ b/conda-env/ci.yml @@ -19,7 +19,9 @@ dependencies: - pandas - python-dateutil - xarray - - xesmf + # Constrained because 0.6.3 breaks with import ESMF + # Source: https://github.com/pangeo-data/xESMF/issues/212 + - xesmf >0.6.3 # Quality Assurance # ================== - types-python-dateutil diff --git a/conda-env/dev.yml b/conda-env/dev.yml index eff2480d..e5bec3b8 100644 --- a/conda-env/dev.yml +++ b/conda-env/dev.yml @@ -18,7 +18,7 @@ dependencies: - numpy=1.23.5 - pandas=1.5.3 - python-dateutil=2.8.2 - - xarray=2023.3.0 + - xarray=2023.4.2 # ================== # Optional # ================== diff --git a/conda-env/readthedocs.yml b/conda-env/readthedocs.yml index 5204854c..a6a2896f 100644 --- a/conda-env/readthedocs.yml +++ b/conda-env/readthedocs.yml @@ -17,7 +17,7 @@ dependencies: - numpy=1.23.5 - pandas=1.5.3 - python-dateutil=2.8.2 - - xarray=2023.3.0 + - xarray=2023.4.2 # ================== # Optional # ================== diff --git a/tests/test_bounds.py b/tests/test_bounds.py index 389e1267..38151642 100644 --- a/tests/test_bounds.py +++ b/tests/test_bounds.py @@ -72,62 +72,89 @@ def setup(self): def test_adds_bounds_to_the_dataset(self): ds = self.ds_with_bnds.copy() - ds = ds.drop_vars(["lat_bnds", "lon_bnds"]) + # Compare the result against the expected. result = ds.bounds.add_missing_bounds(axes=["X", "Y"]) assert result.identical(self.ds_with_bnds) - def test_skips_adding_bounds_for_coords_that_are_1_dim_singleton(self): - # Length <=1 - lon = xr.DataArray( - data=np.array([0]), - dims=["lon"], - attrs={"units": "degrees_east", "axis": "X"}, + def test_skips_adding_bounds_for_coords_that_are_1_dim_singleton(self, caplog): + # NOTE: Suppress logger warning to avoid polluting test suite. + caplog.set_level(logging.CRITICAL) + + # Create the input dataset. + ds = xr.Dataset( + coords={ + "lon": xr.DataArray( + data=np.array([0]), + dims=["lon"], + attrs={"units": "degrees_east", "axis": "X"}, + ) + } ) - ds = xr.Dataset(coords={"lon": lon}) + # Compare the result against the expected. result = ds.bounds.add_missing_bounds(axes=["X"]) - assert result.identical(ds) - def test_skips_adding_bounds_for_coords_that_are_0_dim_singleton(self): - # 0-dimensional array - lon = xr.DataArray( - data=float(0), - attrs={"units": "degrees_east", "axis": "X"}, + def test_skips_adding_bounds_for_coords_that_are_0_dim_singleton(self, caplog): + # NOTE: Suppress logger warning to avoid polluting test suite. + caplog.set_level(logging.CRITICAL) + + # Create the input dataset. + ds = xr.Dataset( + coords={ + "lon": xr.DataArray( + data=float(0), + attrs={"units": "degrees_east", "axis": "X"}, + ) + } ) - ds = xr.Dataset(coords={"lon": lon}) + # Compare the result against the expected. result = ds.bounds.add_missing_bounds(axes=["X"]) - assert result.identical(ds) - def test_skips_adding_time_bounds_for_coords_that_are_1_dim_singleton(self): - # Length <=1 - time = xr.DataArray( - data=np.array(["2000-01-01T12:00:00.000000000"], dtype="datetime64[ns]"), - dims=["time"], - attrs={"calendar": "standard", "units": "days since 1850-01-01"}, + def test_skips_adding_time_bounds_for_coords_that_are_1_dim_singleton(self, caplog): + # NOTE: Suppress logger warning to avoid polluting test suite. + caplog.set_level(logging.CRITICAL) + + # Create the input dataset. + ds = xr.Dataset( + coords={ + "time": xr.DataArray( + data=np.array( + ["2000-01-01T12:00:00.000000000"], dtype="datetime64[ns]" + ), + dims=["time"], + attrs={"calendar": "standard", "units": "days since 1850-01-01"}, + ) + } ) - ds = xr.Dataset(coords={"time": time}) + # Compare the result against the expected. result = ds.bounds.add_missing_bounds(axes=["T"]) - assert result.identical(ds) def test_skips_adding_time_bounds_for_coords_that_are_not_datetime_like_objects( - self, + self, caplog ): - time = xr.DataArray( - data=np.array([0, 1, 2]), - dims=["time"], - attrs={"calendar": "standard", "units": "days since 1850-01-01"}, + # NOTE: Suppress logger warning to avoid polluting test suite. + caplog.set_level(logging.CRITICAL) + + # Create the input dataset. + ds = xr.Dataset( + coords={ + "time": xr.DataArray( + data=np.array([0, 1, 2]), + dims=["time"], + attrs={"calendar": "standard", "units": "days since 1850-01-01"}, + ) + } ) - ds = xr.Dataset(coords={"time": time}) + # Compare the result against the expected. result = ds.bounds.add_missing_bounds(axes=["T"]) - assert result.identical(ds) @@ -332,7 +359,7 @@ def test_raises_error_if_lat_coord_var_units_is_not_in_degrees(self): def test_adds_bounds_and_sets_units_to_degrees_north_if_lat_coord_var_is_missing_units_attr( self, caplog ): - # Suppress the warning + # NOTE: Suppress logger warning to avoid polluting test suite. caplog.set_level(logging.CRITICAL) ds = self.ds.copy() diff --git a/tests/test_dataset.py b/tests/test_dataset.py index eaa667de..fd70734f 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -8,6 +8,7 @@ from lxml import etree from tests.fixtures import generate_dataset +from xcdat._logger import _setup_custom_logger from xcdat.dataset import ( _keep_single_var, _postprocess_dataset, @@ -15,9 +16,8 @@ open_dataset, open_mfdataset, ) -from xcdat.logger import setup_custom_logger -logger = setup_custom_logger("xcdat.dataset", propagate=True) +logger = _setup_custom_logger("xcdat.dataset", propagate=True) class TestOpenDataset: @@ -29,6 +29,9 @@ def setup(self, tmp_path): self.file_path = f"{dir}/file.nc" def test_raises_warning_if_decode_times_but_no_time_coords_found(self, caplog): + # Silence warning to not pollute test suite output + caplog.set_level(logging.CRITICAL) + ds = generate_dataset(decode_times=False, cf_compliant=True, has_bounds=True) ds = ds.drop_dims("time") ds.to_netcdf(self.file_path) @@ -42,14 +45,6 @@ def test_raises_warning_if_decode_times_but_no_time_coords_found(self, caplog): expected = expected.drop_dims("time") assert result.identical(expected) - assert ( - "No time coordinates were found in this dataset to decode. If time " - "coordinates were expected to exist, make sure they are detectable by " - "setting the CF 'axis' or 'standard_name' attribute (e.g., " - "ds['time'].attrs['axis'] = 'T' or " - "ds['time'].attrs['standard_name'] = 'time'). Afterwards, try decoding " - "again with `xcdat.decode_time`." - ) in caplog.text def test_skip_decoding_time_explicitly(self): ds = generate_dataset(decode_times=False, cf_compliant=True, has_bounds=True) @@ -89,12 +84,12 @@ def test_skips_adding_bounds(self): assert result.identical(ds) def test_decode_time_in_days(self): - ds = generate_dataset(decode_times=False, cf_compliant=True, has_bounds=True) + ds = generate_dataset( + decode_times=False, cf_compliant=True, has_bounds=True + ).isel(time=slice(0, 3)) ds.to_netcdf(self.file_path) - result = open_dataset(self.file_path, data_var="ts", decode_times=True) - - # Generate an expected dataset with decoded CF compliant time units. + # Create the expected dataset. expected = ds.copy() expected["time"] = xr.DataArray( name="time", @@ -109,42 +104,6 @@ def test_decode_time_in_days(self): cftime.DatetimeGregorian( 2000, 1, 3, 0, 0, 0, 0, has_year_zero=False ), - cftime.DatetimeGregorian( - 2000, 1, 4, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 5, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 6, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 7, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 8, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 9, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 10, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 11, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 12, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 13, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 14, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 15, 0, 0, 0, 0, has_year_zero=False - ), ], dtype="object", ), @@ -178,102 +137,6 @@ def test_decode_time_in_days(self): 2000, 1, 3, 0, 0, 0, 0, has_year_zero=False ), ], - [ - cftime.DatetimeGregorian( - 2000, 1, 3, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 4, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 4, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 5, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 5, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 6, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 6, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 7, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 7, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 8, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 8, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 9, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 9, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 10, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 10, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 11, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 11, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 12, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 12, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 13, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 13, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 14, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 14, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 15, 0, 0, 0, 0, has_year_zero=False - ), - ], ], dtype="object", ), @@ -287,9 +150,11 @@ def test_decode_time_in_days(self): "bounds": "time_bnds", } + # Compare the result against the expected. + result = open_dataset(self.file_path, data_var="ts", decode_times=True) assert result.identical(expected) - # Check encoding is preserved. + # Compare time encoding. expected.time.encoding = { "zlib": False, "szip": False, @@ -303,7 +168,7 @@ def test_decode_time_in_days(self): "chunksizes": None, # Set source as result source because it changes every test run. "source": result.time.encoding["source"], - "original_shape": (15,), + "original_shape": expected.time.shape, "dtype": np.dtype("int64"), "units": "days since 2000-01-01", "calendar": "standard", @@ -320,22 +185,21 @@ def test_decode_time_in_days(self): "contiguous": True, "chunksizes": None, "source": result.time.encoding["source"], - "original_shape": (15, 2), + "original_shape": expected.time_bnds.shape, "dtype": np.dtype("int64"), "units": "days since 2000-01-01", "calendar": "standard", } - assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding def test_decode_time_in_months(self): - ds = generate_dataset(decode_times=False, cf_compliant=False, has_bounds=True) + ds = generate_dataset( + decode_times=False, cf_compliant=False, has_bounds=True + ).isel(time=slice(0, 3)) ds.to_netcdf(self.file_path) - result = open_dataset(self.file_path, data_var="ts") - - # Generate an expected dataset with decoded non-CF compliant time units. + # Create the expected dataset. expected = ds.copy() expected["time"] = xr.DataArray( name="time", @@ -350,42 +214,6 @@ def test_decode_time_in_months(self): cftime.DatetimeGregorian( 2000, 3, 1, 0, 0, 0, 0, has_year_zero=False ), - cftime.DatetimeGregorian( - 2000, 4, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 5, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 6, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 7, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 8, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 9, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 10, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 11, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 12, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 1, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 2, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 3, 1, 0, 0, 0, 0, has_year_zero=False - ), ], dtype="object", ), @@ -420,102 +248,6 @@ def test_decode_time_in_months(self): 2000, 3, 1, 0, 0, 0, 0, has_year_zero=False ), ], - [ - cftime.DatetimeGregorian( - 2000, 3, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 4, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 4, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 5, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 5, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 6, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 6, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 7, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 7, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 8, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 8, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 9, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 9, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 10, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 10, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 11, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 11, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 12, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 12, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 1, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2001, 1, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 2, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2001, 2, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 3, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], ], dtype="object", ), @@ -530,9 +262,11 @@ def test_decode_time_in_months(self): "bounds": "time_bnds", } + # Compare the result against the expected. + result = open_dataset(self.file_path, data_var="ts") assert result.identical(expected) - # Check encoding is preserved. + # Compare time encoding. expected.time.encoding = { "zlib": False, "szip": False, @@ -546,7 +280,7 @@ def test_decode_time_in_months(self): "chunksizes": None, # Set source as result source because it changes every test run. "source": result.time.encoding["source"], - "original_shape": (15,), + "original_shape": expected.time.shape, "dtype": np.dtype("int64"), "units": "months since 2000-01-01", "calendar": "standard", @@ -564,24 +298,23 @@ def test_decode_time_in_months(self): "chunksizes": None, # Set source as result source because it changes every test run. "source": result.time.encoding["source"], - "original_shape": (15, 2), + "original_shape": expected.time_bnds.shape, "dtype": np.dtype("int64"), "units": "months since 2000-01-01", "calendar": "standard", } - assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding def test_keeps_specified_var_and_preserves_bounds(self): ds = generate_dataset(decode_times=True, cf_compliant=True, has_bounds=True) - # Create a modified version of the Dataset with a new var + # Create a modified version of the Dataset with a new var. ds_mod = ds.copy() ds_mod["tas"] = ds_mod.ts.copy() - # Suppress UserWarning regarding missing time.encoding "units" because - # it is not relevant to this test. + # NOTE: Suppress UserWarning regarding missing time.encoding "units" + # because it is not relevant to this test. with warnings.catch_warnings(): warnings.simplefilter("ignore") ds_mod.to_netcdf(self.file_path) @@ -602,6 +335,9 @@ def setUp(self, tmp_path): self.file_path2 = f"{self.dir}/file2.nc" def test_raises_warning_if_decode_times_but_no_time_coords_found(self, caplog): + # Silence warning to not pollute test suite output + caplog.set_level(logging.CRITICAL) + ds = generate_dataset(decode_times=False, cf_compliant=True, has_bounds=True) ds = ds.drop_dims("time") ds.to_netcdf(self.file_path1) @@ -615,14 +351,6 @@ def test_raises_warning_if_decode_times_but_no_time_coords_found(self, caplog): expected = expected.drop_dims("time") assert result.identical(expected) - assert ( - "No time coordinates were found in this dataset to decode. If time " - "coordinates were expected to exist, make sure they are detectable by " - "setting the CF 'axis' or 'standard_name' attribute (e.g., " - "ds['time'].attrs['axis'] = 'T' or " - "ds['time'].attrs['standard_name'] = 'time'). " - "Afterwards, try decoding again with `xcdat.decode_time`." - ) in caplog.text def test_skip_decoding_times_explicitly(self): ds1 = generate_dataset(decode_times=False, cf_compliant=False, has_bounds=True) @@ -748,20 +476,21 @@ def callable(ds): def test_decode_time_in_months(self): # Generate two dataset files with different variables. - ds1 = generate_dataset(decode_times=False, cf_compliant=False, has_bounds=True) + ds1 = generate_dataset( + decode_times=False, cf_compliant=False, has_bounds=True + ).isel(time=slice(0, 3)) ds1.to_netcdf(self.file_path1) - ds2 = generate_dataset(decode_times=False, cf_compliant=False, has_bounds=True) + ds2 = generate_dataset( + decode_times=False, cf_compliant=False, has_bounds=True + ).isel(time=slice(0, 3)) ds2 = ds2.rename_vars({"ts": "tas"}) ds2.to_netcdf(self.file_path2) - # Open both dataset files as a single Dataset object. - result = open_mfdataset([self.file_path1, self.file_path2], data_var="ts") - - # Create an expected Dataset object. + # Create the expected dataset. expected = generate_dataset( decode_times=True, cf_compliant=False, has_bounds=True - ) + ).isel(time=slice(0, 3)) expected["time"] = xr.DataArray( name="time", data=np.array( @@ -775,42 +504,6 @@ def test_decode_time_in_months(self): cftime.DatetimeGregorian( 2000, 3, 1, 0, 0, 0, 0, has_year_zero=False ), - cftime.DatetimeGregorian( - 2000, 4, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 5, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 6, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 7, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 8, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 9, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 10, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 11, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 12, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 1, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 2, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 3, 1, 0, 0, 0, 0, has_year_zero=False - ), ], dtype="object", ), @@ -822,7 +515,6 @@ def test_decode_time_in_months(self): "bounds": "time_bnds", }, ) - expected["time_bnds"] = xr.DataArray( name="time_bnds", data=np.array( @@ -851,114 +543,21 @@ def test_decode_time_in_months(self): 2000, 3, 1, 0, 0, 0, 0, has_year_zero=False ), ], - [ - cftime.DatetimeGregorian( - 2000, 3, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 4, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 4, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 5, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 5, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 6, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 6, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 7, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 7, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 8, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 8, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 9, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 9, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 10, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 10, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 11, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 11, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 12, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 12, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 1, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2001, 1, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 2, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2001, 2, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 3, 1, 0, 0, 0, 0, has_year_zero=False - ), - ], ], dtype="object", ), dims=["time", "bnds"], attrs={"xcdat_bounds": "True"}, ) - # Make sure the expected is chunked. - expected = expected.chunk(chunks={"time": 15, "bnds": 2}) + expected = expected.chunk(chunks={"time": 3, "bnds": 2}) + + # Compare the result against the expected. + result = open_mfdataset([self.file_path1, self.file_path2], data_var="ts") + assert result.identical(expected) - # Check encoding is preserved. The extra metadata like "zlib" are from - # the netCDF4 files. + # Compare the time encoding. + # The extra metadata like "zlib" are from the netCDF4 files. expected.time.encoding = { "zlib": False, "szip": False, @@ -972,7 +571,7 @@ def test_decode_time_in_months(self): "chunksizes": None, # Set source as result source because it changes every test run. "source": result.time.encoding["source"], - "original_shape": (15,), + "original_shape": expected.time.shape, "dtype": np.dtype("int64"), "units": "months since 2000-01-01", "calendar": "standard", @@ -981,8 +580,6 @@ def test_decode_time_in_months(self): "units": "months since 2000-01-01", "calendar": "standard", } - - assert result.identical(expected) assert result.time.encoding == expected.time.encoding # FIXME: For some reason the encoding attributes get dropped only in @@ -1024,6 +621,7 @@ def setup(self): "axis": "T", "long_name": "time", "standard_name": "time", + "calendar": "standard", }, ) time_bnds = xr.DataArray( @@ -1059,14 +657,15 @@ def test_skips_decoding_time_coords_if_units_is_not_supported(self, caplog): # Update logger level to silence the logger warning during test runs. caplog.set_level(logging.ERROR) + # Create the input dataset and update the units. ds = generate_dataset(decode_times=False, cf_compliant=False, has_bounds=True) - ds.time.attrs["units"] = "year AD" result = decode_time(ds) assert ds.identical(result) def test_skips_decoding_time_bounds_if_bounds_dont_exist(self): + # Create the input dataset. ds = xr.Dataset( coords={ "time": xr.DataArray( @@ -1098,7 +697,7 @@ def test_skips_decoding_time_bounds_if_bounds_dont_exist(self): }, ) - result = decode_time(ds) + # Create the expected dataset. expected = xr.Dataset( coords={ "time": xr.DataArray( @@ -1151,10 +750,6 @@ def test_skips_decoding_time_bounds_if_bounds_dont_exist(self): ), }, ) - - assert result.identical(expected) - - # Check encoding is preserved. expected.time.encoding = { "units": "months since 2000-01-01", "calendar": "standard", @@ -1164,10 +759,14 @@ def test_skips_decoding_time_bounds_if_bounds_dont_exist(self): "calendar": "standard", } + # Compare the result agaisnt the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time2.encoding == expected.time.encoding def test_decodes_all_time_coordinates_and_time_bounds(self): + # Create the input dataset. ds = xr.Dataset( coords={ "time": xr.DataArray( @@ -1206,7 +805,7 @@ def test_decodes_all_time_coordinates_and_time_bounds(self): }, ) - result = decode_time(ds) + # Create the expected dataset expected = xr.Dataset( coords={ "time": xr.DataArray( @@ -1295,10 +894,6 @@ def test_decodes_all_time_coordinates_and_time_bounds(self): ), }, ) - - assert result.identical(expected) - - # Check the encoding is preserved. expected.time.encoding = { "units": "months since 2000-01-01", "calendar": "standard", @@ -1312,6 +907,9 @@ def test_decodes_all_time_coordinates_and_time_bounds(self): "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time2.encoding == expected.time2.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding @@ -1344,7 +942,7 @@ def test_decodes_time_coords_and_bounds_without_calendar_attr_set(self, caplog): }, ) - result = decode_time(ds) + # Create the expected dataset expected = xr.Dataset( coords={ "time": xr.DataArray( @@ -1408,10 +1006,6 @@ def test_decodes_time_coords_and_bounds_without_calendar_attr_set(self, caplog): ), }, ) - - assert result.identical(expected) - - # Check the encoding is preserved. expected.time.encoding = { "units": "months since 2000-01-01", "calendar": "standard", @@ -1421,64 +1015,31 @@ def test_decodes_time_coords_and_bounds_without_calendar_attr_set(self, caplog): "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding def test_decode_time_in_days(self): - ds = generate_dataset(decode_times=False, cf_compliant=True, has_bounds=True) + ds = generate_dataset( + decode_times=False, cf_compliant=True, has_bounds=True + ).isel(time=slice(0, 3)) - result = decode_time(ds) - - # Generate an expected dataset with decoded CF compliant time units. + # Create the expected dataset expected = ds.copy() expected["time"] = xr.DataArray( name="time", - data=np.array( - [ - cftime.DatetimeGregorian( - 2000, 1, 1, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 2, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 3, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 4, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 5, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 6, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 7, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 8, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 9, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 10, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 11, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 12, 0, 0, 0, 0, has_year_zero=False - ), + data=np.array( + [ cftime.DatetimeGregorian( - 2000, 1, 13, 0, 0, 0, 0, has_year_zero=False + 2000, 1, 1, 0, 0, 0, 0, has_year_zero=False ), cftime.DatetimeGregorian( - 2000, 1, 14, 0, 0, 0, 0, has_year_zero=False + 2000, 1, 2, 0, 0, 0, 0, has_year_zero=False ), cftime.DatetimeGregorian( - 2000, 1, 15, 0, 0, 0, 0, has_year_zero=False + 2000, 1, 3, 0, 0, 0, 0, has_year_zero=False ), ], dtype="object", @@ -1513,102 +1074,6 @@ def test_decode_time_in_days(self): 2000, 1, 3, 0, 0, 0, 0, has_year_zero=False ), ], - [ - cftime.DatetimeGregorian( - 2000, 1, 3, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 4, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 4, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 5, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 5, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 6, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 6, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 7, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 7, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 8, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 8, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 9, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 9, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 10, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 10, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 11, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 11, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 12, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 12, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 13, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 13, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 14, 0, 0, 0, 0, has_year_zero=False - ), - ], - [ - cftime.DatetimeGregorian( - 2000, 1, 14, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 1, 15, 0, 0, 0, 0, has_year_zero=False - ), - ], ], dtype="object", ), @@ -1621,10 +1086,6 @@ def test_decode_time_in_days(self): "standard_name": "time", "bounds": "time_bnds", } - - assert result.identical(expected) - - # Check encoding is preserved. expected.time.encoding = { "units": "days since 2000-01-01", "calendar": "standard", @@ -1634,6 +1095,9 @@ def test_decode_time_in_days(self): "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding @@ -1645,7 +1109,7 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_s ds.time.attrs["calendar"] = calendar ds.time.attrs["units"] = "months since 2000-01-01" - result = decode_time(ds) + # Create the expected dataset expected = xr.Dataset( { "time": xr.DataArray( @@ -1707,9 +1171,6 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_s ), } ) - assert result.identical(expected) - - # Check the encoding is preserved. expected.time.encoding = { "units": "months since 2000-01-01", "calendar": "standard", @@ -1719,6 +1180,9 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_s "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding @@ -1730,7 +1194,7 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_m ds.time.attrs["calendar"] = calendar ds.time.attrs["units"] = "months since 2000-01-15" - result = decode_time(ds) + # Create the expected dataset expected = xr.Dataset( { "time": xr.DataArray( @@ -1773,9 +1237,6 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_m ), } ) - assert result.identical(expected) - - # Check the encoding is preserved. expected.time.encoding = { "units": "months since 2000-01-15", "calendar": "standard", @@ -1785,6 +1246,9 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_m "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding @@ -1796,7 +1260,7 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_e ds.time.attrs["calendar"] = calendar ds.time.attrs["units"] = "months since 1999-12-31" - result = decode_time(ds) + # Create the expected dataset expected = xr.Dataset( { "time": xr.DataArray( @@ -1839,9 +1303,6 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_e ), } ) - assert result.identical(expected) - - # Check the encoding is preserved. expected.time.encoding = { "units": "months since 1999-12-31", "calendar": "standard", @@ -1851,6 +1312,9 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_at_the_e "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding @@ -1862,8 +1326,7 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_on_a_lea ds.time.attrs["calendar"] = calendar ds.time.attrs["units"] = "months since 2000-02-29" - result = decode_time(ds) - + # Create the expected dataset expected = xr.Dataset( { "time": xr.DataArray( @@ -1906,9 +1369,6 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_on_a_lea ), } ) - assert result.identical(expected) - - # Check the encoding is preserved. expected.time.encoding = { "units": "months since 2000-02-29", "calendar": "standard", @@ -1918,6 +1378,9 @@ def test_decodes_time_coords_and_bounds_in_months_with_a_reference_date_on_a_lea "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding @@ -1930,8 +1393,7 @@ def test_decodes_time_coords_and_bounds_in_years_with_a_reference_date_in_the_mi ds.time.attrs["calendar"] = calendar ds.time.attrs["units"] = "years since 2000-06-01" - result = decode_time(ds) - + # Create the expected dataset expected = xr.Dataset( { "time": xr.DataArray( @@ -1974,9 +1436,6 @@ def test_decodes_time_coords_and_bounds_in_years_with_a_reference_date_in_the_mi ), } ) - assert result.identical(expected) - - # Check the encoding is preserved. expected.time.encoding = { "units": "years since 2000-06-01", "calendar": "standard", @@ -1986,6 +1445,9 @@ def test_decodes_time_coords_and_bounds_in_years_with_a_reference_date_in_the_mi "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding @@ -1998,8 +1460,7 @@ def test_decodes_time_coords_and_bounds_in_years_with_a_reference_date_on_a_leap ds.time.attrs["calendar"] = calendar ds.time.attrs["units"] = "years since 2000-02-29" - result = decode_time(ds) - + # Create the expected dataset expected = xr.Dataset( { "time": xr.DataArray( @@ -2042,9 +1503,6 @@ def test_decodes_time_coords_and_bounds_in_years_with_a_reference_date_on_a_leap ), } ) - assert result.identical(expected) - - # Check the encoding is preserved. expected.time.encoding = { "units": "years since 2000-02-29", "calendar": "standard", @@ -2054,6 +1512,9 @@ def test_decodes_time_coords_and_bounds_in_years_with_a_reference_date_on_a_leap "calendar": "standard", } + # Compare the result against the expected. + result = decode_time(ds) + assert result.identical(expected) assert result.time.encoding == expected.time.encoding assert result.time_bnds.encoding == expected.time_bnds.encoding @@ -2065,159 +1526,30 @@ def setup(self): decode_times=True, cf_compliant=False, has_bounds=True ) - def test_centers_time_coords_and_converts_datetime_dtype_to_cftime_object_type( - self, - ): - ds = generate_dataset(decode_times=True, cf_compliant=False, has_bounds=True) - - # Create a dataset with uncentered time coordinates that are decoded as - # dtype="datetime[ns]" - ds_uncentered = ds.copy() - ds_uncentered["time"] = xr.DataArray( - data=np.array( - [ - "2000-01-31T12:00:00.000000000", - "2000-02-29T12:00:00.000000000", - "2000-03-31T12:00:00.000000000", - "2000-04-30T00:00:00.000000000", - "2000-05-31T12:00:00.000000000", - "2000-06-30T00:00:00.000000000", - "2000-07-31T12:00:00.000000000", - "2000-08-31T12:00:00.000000000", - "2000-09-30T00:00:00.000000000", - "2000-10-16T12:00:00.000000000", - "2000-11-30T00:00:00.000000000", - "2000-12-31T12:00:00.000000000", - "2001-01-31T12:00:00.000000000", - "2001-02-28T00:00:00.000000000", - "2001-12-31T12:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - dims=ds.time.dims, - attrs=ds.time.attrs, - ) - ds_uncentered.time.encoding = { - "source": None, - "original_shape": ds.time.data.shape, - "dtype": np.dtype("float64"), - "units": "days since 2000-01-01", - "calendar": "standard", - "_FillValue": False, - } - - # Compare result of the method against the expected. - result = _postprocess_dataset(ds_uncentered, center_times=True) - expected = ds.copy() - expected["time"] = xr.DataArray( - name="time", - data=np.array( - [ - cftime.DatetimeGregorian( - 2000, 1, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 2, 15, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 3, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 4, 16, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 5, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 6, 16, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 7, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 8, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 9, 16, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 10, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 11, 16, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 12, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 1, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 2, 15, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 12, 16, 12, 0, 0, 0, has_year_zero=False - ), - ], - dtype="object", - ), - dims="time", - attrs={ - "long_name": "time", - "standard_name": "time", - "axis": "T", - "bounds": "time_bnds", - }, - ) - - expected.time.encoding = { - "source": None, - "original_shape": (15,), - "dtype": np.dtype("float64"), - "units": "days since 2000-01-01", - "calendar": "standard", - "_FillValue": False, - } - - # Compare result of the function against the expected. - assert result.identical(expected) - assert result.time.encoding == expected.time.encoding - def test_centers_time_coordinates_and_maintains_cftime_object_type(self): - # Create a dataset with uncentered time coordinates - ds = generate_dataset(decode_times=True, cf_compliant=False, has_bounds=True) + # Create the input dataset with uncentered time coordinates + ds = generate_dataset( + decode_times=True, cf_compliant=False, has_bounds=True + ).isel(time=slice(0, 3)) uncentered_time = np.array( [ cftime.DatetimeGregorian(2000, 1, 31, 12, 0, 0, 0), cftime.DatetimeGregorian(2000, 2, 29, 12, 0, 0, 0), cftime.DatetimeGregorian(2000, 3, 31, 12, 0, 0, 0), - cftime.DatetimeGregorian(2000, 4, 30, 0, 0, 0, 0), - cftime.DatetimeGregorian(2000, 5, 31, 12, 0, 0, 0), - cftime.DatetimeGregorian(2000, 6, 30, 0, 0, 0, 0), - cftime.DatetimeGregorian(2000, 7, 31, 12, 0, 0, 0), - cftime.DatetimeGregorian(2000, 8, 31, 12, 0, 0, 0), - cftime.DatetimeGregorian(2000, 9, 30, 0, 0, 0, 0), - cftime.DatetimeGregorian(2000, 10, 16, 12, 0, 0, 0), - cftime.DatetimeGregorian(2000, 11, 30, 0, 0, 0, 0), - cftime.DatetimeGregorian(2000, 12, 31, 12, 0, 0, 0), - cftime.DatetimeGregorian(2001, 1, 31, 12, 0, 0, 0), - cftime.DatetimeGregorian(2001, 2, 28, 0, 0, 0, 0), - cftime.DatetimeGregorian(2001, 12, 31, 12, 0, 0, 0), ], dtype="object", ) ds.time.data[:] = uncentered_time ds.time.encoding = { "source": None, - "original_shape": ds.time.data.shape, + "original_shape": ds.time.shape, "dtype": np.dtype("float64"), "units": "days since 2000-01-01", "calendar": "standard", "_FillValue": False, } - # Compare result of the method against the expected. - result = _postprocess_dataset(ds, center_times=True) + # Create the expected dataset. expected = ds.copy() expected["time"] = xr.DataArray( name="time", @@ -2232,42 +1564,6 @@ def test_centers_time_coordinates_and_maintains_cftime_object_type(self): cftime.DatetimeGregorian( 2000, 3, 16, 12, 0, 0, 0, has_year_zero=False ), - cftime.DatetimeGregorian( - 2000, 4, 16, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 5, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 6, 16, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 7, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 8, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 9, 16, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 10, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 11, 16, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2000, 12, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 1, 16, 12, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 2, 15, 0, 0, 0, 0, has_year_zero=False - ), - cftime.DatetimeGregorian( - 2001, 12, 16, 12, 0, 0, 0, has_year_zero=False - ), ], dtype="object", ), @@ -2282,18 +1578,49 @@ def test_centers_time_coordinates_and_maintains_cftime_object_type(self): expected.time.encoding = { "source": None, - "original_shape": (15,), + "original_shape": expected.time.shape, "dtype": np.dtype("float64"), "units": "days since 2000-01-01", "calendar": "standard", "_FillValue": False, } + expected["time_bnds"] = xr.DataArray( + name="time_bnds", + data=np.array( + [ + [ + cftime.DatetimeGregorian( + 2000, 1, 1, 0, 0, 0, 0, has_year_zero=False + ), + cftime.DatetimeGregorian( + 2000, 2, 1, 0, 0, 0, 0, has_year_zero=False + ), + ], + [ + cftime.DatetimeGregorian( + 2000, 2, 1, 0, 0, 0, 0, has_year_zero=False + ), + cftime.DatetimeGregorian( + 2000, 3, 1, 0, 0, 0, 0, has_year_zero=False + ), + ], + [ + cftime.DatetimeGregorian( + 2000, 3, 1, 0, 0, 0, 0, has_year_zero=False + ), + cftime.DatetimeGregorian( + 2000, 4, 1, 0, 0, 0, 0, has_year_zero=False + ), + ], + ], + dtype="object", + ), + dims=["time", "bnds"], + attrs=ds.time_bnds.attrs, + ) - # Update time bounds with centered time coordinates. - expected["time_bnds"] = ds.time_bnds.copy() - expected["time_bnds"]["time"] = expected.time - - # Compare result of the function against the expected. + # Compare result of the method against the expected. + result = _postprocess_dataset(ds, center_times=True) assert result.identical(expected) assert result.time.encoding == expected.time.encoding @@ -2337,7 +1664,7 @@ def test_adds_missing_lat_and_lon_and_time_bounds(self): def test_orients_longitude_bounds_from_180_to_360_and_sorts_with_prime_meridian_cell( self, ): - # Chunk the dataset to test method also works with Dask. + # Chunk the input dataset to test method also works with Dask. ds = xr.Dataset( coords={ "lon": xr.DataArray( diff --git a/tests/test_regrid.py b/tests/test_regrid.py index c5219b21..e2ee69ff 100644 --- a/tests/test_regrid.py +++ b/tests/test_regrid.py @@ -139,7 +139,7 @@ def setup(self): } ) - @pytest.mark.filterwarnings("ignore:.*invalid value.*true_divide.*:RuntimeWarning") + @pytest.mark.filterwarnings("ignore:.*invalid value.*divide.*:RuntimeWarning") def test_output_bounds(self): ds = fixtures.generate_dataset( decode_times=True, cf_compliant=False, has_bounds=True @@ -796,7 +796,7 @@ def test_invalid_tool(self): self.ac.horizontal("ts", mock.MagicMock(), "test") # type: ignore @requires_xesmf - @pytest.mark.filterwarnings("ignore:.*invalid value.*true_divide.*:RuntimeWarning") + @pytest.mark.filterwarnings("ignore:.*invalid value.*divide.*:RuntimeWarning") def test_convenience_methods(self): ds = fixtures.generate_dataset( decode_times=True, cf_compliant=False, has_bounds=True diff --git a/tests/test_temporal.py b/tests/test_temporal.py index 866ffb7c..6e2d6049 100644 --- a/tests/test_temporal.py +++ b/tests/test_temporal.py @@ -1,3 +1,5 @@ +import logging + import cftime import numpy as np import pytest @@ -6,14 +8,14 @@ from xarray.tests import requires_dask from tests.fixtures import generate_dataset -from xcdat.logger import setup_custom_logger +from xcdat._logger import _setup_custom_logger from xcdat.temporal import ( TemporalAccessor, _contains_datetime_like_objects, _get_datetime_like_type, ) -logger = setup_custom_logger("xcdat.temporal", propagate=True) +logger = _setup_custom_logger("xcdat.temporal", propagate=True) class TestTemporalAccessor: @@ -41,9 +43,10 @@ def test_raises_error_if_time_coords_are_not_decoded(self): with pytest.raises(TypeError): ds.temporal.average("ts") - def test_raises_warning_if_calendar_encoding_attr_not_found_on_data_var_time_coords( - self, caplog - ): + def test_defaults_calendar_attribute_to_standard_if_missing(self, caplog): + # Silence warning to not pollute test suite output + caplog.set_level(logging.CRITICAL) + ds: xr.Dataset = generate_dataset( decode_times=True, cf_compliant=False, has_bounds=True ) @@ -51,13 +54,7 @@ def test_raises_warning_if_calendar_encoding_attr_not_found_on_data_var_time_coo ds.temporal.average("ts") - assert ( - "'time' does not have a calendar encoding attribute set, " - "which is used to determine the `cftime.datetime` object type for the " - "output time coordinates. Defaulting to CF 'standard' calendar. " - "Otherwise, set the calendar type (e.g., " - "ds['time'].encoding['calendar'] = 'noleap') and try again." - ) in caplog.text + assert ds.temporal.calendar == "standard" def test_averages_for_yearly_time_series(self): ds = xr.Dataset( @@ -460,9 +457,10 @@ def test_raises_error_if_time_coords_are_not_decoded(self): with pytest.raises(TypeError): ds.temporal.group_average("ts", freq="year") - def test_raises_warning_if_calendar_encoding_attr_not_found_on_data_var_time_coords( - self, caplog - ): + def test_defaults_calendar_attribute_to_standard_if_missing(self, caplog): + # Silence warning to not pollute test suite output + caplog.set_level(logging.CRITICAL) + ds: xr.Dataset = generate_dataset( decode_times=True, cf_compliant=False, has_bounds=True ) @@ -470,13 +468,7 @@ def test_raises_warning_if_calendar_encoding_attr_not_found_on_data_var_time_coo ds.temporal.group_average("ts", freq="year") - assert ( - "'time' does not have a calendar encoding attribute set, " - "which is used to determine the `cftime.datetime` object type for the " - "output time coordinates. Defaulting to CF 'standard' calendar. " - "Otherwise, set the calendar type (e.g., " - "ds['time'].encoding['calendar'] = 'noleap') and try again." - ) in caplog.text + assert ds.temporal.calendar == "standard" def test_weighted_annual_averages(self): ds = self.ds.copy() @@ -1039,9 +1031,10 @@ def test_raises_error_if_time_coords_are_not_decoded(self): with pytest.raises(TypeError): ds.temporal.climatology("ts", freq="year") - def test_raises_warning_if_calendar_encoding_attr_not_found_on_data_var_time_coords( - self, caplog - ): + def test_defaults_calendar_attribute_to_standard_if_missing(self, caplog): + # Silence warning to not pollute test suite output + caplog.set_level(logging.CRITICAL) + ds: xr.Dataset = generate_dataset( decode_times=True, cf_compliant=False, has_bounds=True ) @@ -1049,13 +1042,7 @@ def test_raises_warning_if_calendar_encoding_attr_not_found_on_data_var_time_coo ds.temporal.climatology("ts", freq="season") - assert ( - "'time' does not have a calendar encoding attribute set, " - "which is used to determine the `cftime.datetime` object type for the " - "output time coordinates. Defaulting to CF 'standard' calendar. " - "Otherwise, set the calendar type (e.g., " - "ds['time'].encoding['calendar'] = 'noleap') and try again." - ) in caplog.text + assert ds.temporal.calendar == "standard" def test_raises_error_if_reference_period_arg_is_incorrect(self): ds = self.ds.copy() @@ -1742,9 +1729,10 @@ def test_raises_error_if_time_coords_are_not_decoded(self): with pytest.raises(TypeError): ds.temporal.departures("ts", freq="season") - def test_raises_warning_if_calendar_encoding_attr_not_found_on_data_var_time_coords( - self, caplog - ): + def test_defaults_calendar_attribute_to_standard_if_missing(self, caplog): + # Silence warning to not pollute test suite output + caplog.set_level(logging.CRITICAL) + ds: xr.Dataset = generate_dataset( decode_times=True, cf_compliant=False, has_bounds=True ) @@ -1752,13 +1740,7 @@ def test_raises_warning_if_calendar_encoding_attr_not_found_on_data_var_time_coo ds.temporal.departures("ts", freq="season") - assert ( - "'time' does not have a calendar encoding attribute set, " - "which is used to determine the `cftime.datetime` object type for the " - "output time coordinates. Defaulting to CF 'standard' calendar. " - "Otherwise, set the calendar type (e.g., " - "ds['time'].encoding['calendar'] = 'noleap') and try again." - ) in caplog.text + assert ds.temporal.calendar == "standard" def test_raises_error_if_reference_period_arg_is_incorrect(self): ds = self.ds.copy() diff --git a/xcdat/logger.py b/xcdat/_logger.py similarity index 64% rename from xcdat/logger.py rename to xcdat/_logger.py index f4cec95d..f7c30218 100644 --- a/xcdat/logger.py +++ b/xcdat/_logger.py @@ -2,10 +2,25 @@ import logging import logging.handlers +# Logging module setup +log_format = ( + "%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s" +) +logging.basicConfig(format=log_format, filemode="w", level=logging.INFO) -def setup_custom_logger(name: str, propagate: bool = False) -> logging.Logger: +# Console handler setup +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) +logFormatter = logging.Formatter(log_format) +console_handler.setFormatter(logFormatter) +logging.getLogger().addHandler(console_handler) + + +def _setup_custom_logger(name, propagate=True) -> logging.Logger: """Sets up a custom logger. + Documentation on logging: https://docs.python.org/3/library/logging.html + Parameters ---------- name : str @@ -43,18 +58,7 @@ def setup_custom_logger(name: str, propagate: bool = False) -> logging.Logger: >>> logger.critical("") """ - log_format = "%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s" - log_filemode = "w" # w: overwrite; a: append - - # Setup - logging.basicConfig(format=log_format, filemode=log_filemode, level=logging.INFO) logger = logging.getLogger(name) logger.propagate = propagate - # Console output - consoleHandler = logging.StreamHandler() - logFormatter = logging.Formatter(log_format) - consoleHandler.setFormatter(logFormatter) - logger.addHandler(consoleHandler) - return logger diff --git a/xcdat/bounds.py b/xcdat/bounds.py index 942d0251..388053c9 100644 --- a/xcdat/bounds.py +++ b/xcdat/bounds.py @@ -12,16 +12,16 @@ from xarray.coding.cftime_offsets import get_date_type from xarray.core.common import contains_cftime_datetimes +from xcdat._logger import _setup_custom_logger from xcdat.axis import CF_ATTR_MAP, CFAxisKey, get_dim_coords from xcdat.dataset import _get_data_var -from xcdat.logger import setup_custom_logger from xcdat.temporal import ( _contains_datetime_like_objects, _get_datetime_like_type, _infer_freq, ) -logger = setup_custom_logger(__name__) +logger = _setup_custom_logger(__name__) @xr.register_dataset_accessor("bounds") diff --git a/xcdat/dataset.py b/xcdat/dataset.py index ece06c27..5103a68a 100644 --- a/xcdat/dataset.py +++ b/xcdat/dataset.py @@ -16,12 +16,12 @@ from xarray.core.variable import as_variable from xcdat import bounds as bounds_accessor # noqa: F401 +from xcdat._logger import _setup_custom_logger from xcdat.axis import CFAxisKey, _get_all_coord_keys from xcdat.axis import center_times as center_times_func from xcdat.axis import swap_lon_axis -from xcdat.logger import setup_custom_logger -logger = setup_custom_logger(__name__) +logger = _setup_custom_logger(__name__) #: List of non-CF compliant time units. NON_CF_TIME_UNITS: List[str] = ["month", "months", "year", "years"] diff --git a/xcdat/regridder/base.py b/xcdat/regridder/base.py index e3419b09..9fcf7925 100644 --- a/xcdat/regridder/base.py +++ b/xcdat/regridder/base.py @@ -4,9 +4,9 @@ import xarray as xr import xcdat.bounds # noqa: F401 -from xcdat.logger import setup_custom_logger +from xcdat._logger import _setup_custom_logger -logger = setup_custom_logger(__name__) +logger = _setup_custom_logger(__name__) def preserve_bounds( diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 7711fe17..33ad8c2a 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -14,11 +14,11 @@ from xarray.core.groupby import DataArrayGroupBy from xcdat import bounds # noqa: F401 +from xcdat._logger import _setup_custom_logger from xcdat.axis import get_dim_coords from xcdat.dataset import _get_data_var -from xcdat.logger import setup_custom_logger -logger = setup_custom_logger(__name__) +logger = _setup_custom_logger(__name__) # Type alias for supported time averaging modes. Mode = Literal["average", "group_average", "climatology", "departures"] @@ -816,10 +816,10 @@ def _set_data_var_attrs(self, data_var: str): # Get the `cftime` date type based on the CF calendar attribute. # The date type is used to get the correct cftime.datetime sub-class # type for creating new grouped time coordinates for averaging. - try: - self.calendar = dv[self.dim].encoding["calendar"] - except KeyError: + self.calendar = dv[self.dim].encoding.get("calendar", None) + if self.calendar is None: self.calendar = "standard" + logger.warning( f"'{self.dim}' does not have a calendar encoding attribute set, " "which is used to determine the `cftime.datetime` object type for the " @@ -1138,7 +1138,7 @@ def _group_average( # dimension) shape of the `weights` DataArray to the # multi-dimensional shape of its corresponding data variable. weights, _ = xr.broadcast(self._weights, dv) - weights = xr.where(np.isnan(dv), 0.0, weights) + weights = xr.where(dv.copy().isnull(), 0.0, weights) # Perform weighted average using the formula # WA = sum(data*weights) / sum(weights). The denominator must be