Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add interpolate to resample_hours #2440

Merged
merged 8 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 30 additions & 15 deletions doc/recipe/preprocessor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ supported too if proper keyword arguments are specified:
``hmean`` :const:`iris.analysis.HMEAN` no
``max`` :const:`iris.analysis.MAX` no
``mean`` :const:`iris.analysis.MEAN` yes
``median`` :const:`iris.analysis.MEDIAN` [#f2]_ no
``median`` :const:`iris.analysis.MEDIAN` [#f2]_ no
``min`` :const:`iris.analysis.MIN` no
``peak`` :const:`iris.analysis.PEAK` no
``percentile`` :const:`iris.analysis.PERCENTILE` no
Expand Down Expand Up @@ -1570,29 +1570,44 @@ resample_hours:
``resample_hours``
------------------

This function changes the frequency of the data in the cube by extracting the
timesteps that belongs to the desired frequency. It is important to note that
it is mainly mean to be used with instantaneous data
Change the frequency of x-hourly data to y-hourly data by either eliminating
extra time steps or interpolation.
This is intended to be used with instantaneous data.

Parameters:
* interval: New frequency of the data. Must be a divisor of 24
* offset: First desired hour. Default 0. Must be lower than the interval
* `interval` (:obj:`int`): New frequency of the data. Must be a divisor of 24.
* `offset` (:obj:`int`): First hour of the desired output data (default:
0). Must be lower than the value of `interval`.
* `interpolate` (``None`` or :obj:`str`): If `interpolate` is ``None``
(default), convert x-hourly data to y-hourly (y > x) by eliminating extra
time steps. If `interpolate` is 'nearest' or 'linear', use
nearest-neighbor or bilinear interpolation to convert general x-hourly
data to general y-hourly data.

Examples:
* Convert to 12-hourly, by getting timesteps at 0:00 and 12:00:

.. code-block:: yaml
* Convert to 12-hourly data by getting time steps at 0:00 and 12:00:

resample_hours:
hours: 12
.. code-block:: yaml

* Convert to 12-hourly, by getting timesteps at 6:00 and 18:00:
resample_hours:
hours: 12

.. code-block:: yaml
* Convert to 12-hourly data by getting time steps at 6:00 and 18:00:

.. code-block:: yaml

resample_hours:
hours: 12
offset: 6

* Convert to 3-hourly data using bilinear interpolation:

.. code-block:: yaml

resample_hours:
hours: 12
offset: 6
resample_hours:
hours: 3
interpolate: linear

See also :func:`esmvalcore.preprocessor.resample_hours`.

Expand Down
12 changes: 10 additions & 2 deletions esmvalcore/_recipe/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,8 @@ def _check_literal(
user_value = settings[step].get(option, allowed_values[0])
if user_value not in allowed_values:
raise RecipeError(
f"Expected one of {allowed_values} for `{option}`, got "
f"'{user_value}'"
f"Expected one of {allowed_values} for option `{option}` of "
f"preprocessor `{step}`, got '{user_value}'"
)


Expand All @@ -437,6 +437,14 @@ def _check_literal(
)


resample_hours = partial(
_check_literal,
step='resample_hours',
option='interpolate',
allowed_values=(None, 'nearest', 'linear'),
)


def _check_ref_attributes(products: set, *, step: str, attr_name: str) -> None:
"""Check that exactly one reference dataset is given."""
products = {p for p in products if step in p.settings}
Expand Down
1 change: 1 addition & 0 deletions esmvalcore/_recipe/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,7 @@ def _update_preproc_functions(settings, dataset, datasets, missing_vars):
check.regridding_schemes(settings)
check.bias_type(settings)
check.metric_type(settings)
check.resample_hours(settings)


def _get_preprocessor_task(datasets, profiles, task_name):
Expand Down
82 changes: 58 additions & 24 deletions esmvalcore/preprocessor/_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@
import logging
import warnings
from functools import partial
from typing import Iterable, Optional
from typing import Iterable, Literal, Optional
from warnings import filterwarnings

import dask.array as da
import dask.config
import iris
import iris.analysis
import iris.coord_categorisation
import iris.util
import isodate
import numpy as np
from cf_units import Unit
from cftime import datetime as cf_datetime
from iris.coords import AuxCoord, Coord, DimCoord
from iris.cube import Cube, CubeList
from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError
Expand Down Expand Up @@ -1249,31 +1251,38 @@ def timeseries_filter(
return cube


def resample_hours(cube: Cube, interval: int, offset: int = 0) -> Cube:
"""Convert x-hourly data to y-hourly by eliminating extra timesteps.
def resample_hours(
cube: Cube,
interval: int,
offset: int = 0,
interpolate: Optional[Literal['nearest', 'linear']] = None,
) -> Cube:
"""Convert x-hourly data to y-hourly data.

Convert x-hourly data to y-hourly (y > x) by eliminating the extra
timesteps. This is intended to be used only with instantaneous values.
This is intended to be used with instantaneous data.

For example:

- resample_hours(cube, interval=6): Six-hourly intervals at 0:00, 6:00,
Examples
--------
- ``resample_hours(cube, interval=6)``: Six-hourly intervals at 0:00, 6:00,
12:00, 18:00.

- resample_hours(cube, interval=6, offset=3): Six-hourly intervals at
- ``resample_hours(cube, interval=6, offset=3)``: Six-hourly intervals at
3:00, 9:00, 15:00, 21:00.

- resample_hours(cube, interval=12, offset=6): Twelve-hourly intervals
- ``resample_hours(cube, interval=12, offset=6)``: Twelve-hourly intervals
at 6:00, 18:00.

Parameters
----------
cube:
Input cube.
interval:
The period (hours) of the desired data.
offset: optional
The firs hour (hours) of the desired data.
The period (hours) of the desired output data.
offset:
The first hour of the desired output data.
interpolate:
If `interpolate` is ``None`` (default), convert x-hourly data to
y-hourly (y > x) by eliminating extra time steps. If `interpolate` is
'nearest' or 'linear', use nearest-neighbor or bilinear interpolation
to convert general x-hourly data to general y-hourly data.

Returns
-------
Expand All @@ -1283,7 +1292,9 @@ def resample_hours(cube: Cube, interval: int, offset: int = 0) -> Cube:
Raises
------
ValueError:
The specified frequency is not a divisor of 24.
`interval` is not a divisor of 24; invalid `interpolate` given; or
input data does not contain any target hour (if `interpolate` is
``None``).

"""
allowed_intervals = (1, 2, 3, 4, 6, 12)
Expand All @@ -1298,15 +1309,38 @@ def resample_hours(cube: Cube, interval: int, offset: int = 0) -> Cube:
if cube_period.total_seconds() / 3600 > interval:
raise ValueError(f"Data period ({cube_period}) should be lower than "
f"the interval ({interval})")
hours = [PartialDateTime(hour=h) for h in range(0 + offset, 24, interval)]
dates = time.units.num2date(time.points)
select = np.zeros(len(dates), dtype=bool)
for hour in hours:
select |= dates == hour
cube = _select_timeslice(cube, select)
if cube is None:
raise ValueError(
f"Time coordinate {dates} does not contain {hours} for {cube}")

# Interpolate input time to requested hours if desired
if interpolate:
valeriupredoi marked this conversation as resolved.
Show resolved Hide resolved
if interpolate == 'nearest':
interpolation_scheme = iris.analysis.Nearest()
elif interpolate == 'linear':
interpolation_scheme = iris.analysis.Linear()
else:
raise ValueError(
f"Expected `None`, 'nearest' or 'linear' for `interpolate`, "
f"got '{interpolate}'"
)
new_dates = sorted([
cf_datetime(y, m, d, h, calendar=time.units.calendar)
for h in range(0 + offset, 24, interval)
for (y, m, d) in {(d.year, d.month, d.day) for d in dates}
])
valeriupredoi marked this conversation as resolved.
Show resolved Hide resolved
cube = cube.interpolate([('time', new_dates)], interpolation_scheme)
else:
hours = [
PartialDateTime(hour=h) for h in range(0 + offset, 24, interval)
]
dates = time.units.num2date(time.points)
select = np.zeros(len(dates), dtype=bool)
for hour in hours:
select |= dates == hour
cube = _select_timeslice(cube, select)
if cube is None:
raise ValueError(
f"Time coordinate {dates} does not contain {hours} for {cube}"
)

return cube

Expand Down
43 changes: 39 additions & 4 deletions tests/integration/recipe/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3009,8 +3009,8 @@ def test_invalid_bias_type(tmp_path, patched_datafinder, session):
scripts: null
""")
msg = (
"Expected one of ('absolute', 'relative') for `bias_type`, got "
"'INVALID'"
"Expected one of ('absolute', 'relative') for option `bias_type` of "
"preprocessor `bias`, got 'INVALID'"
)
with pytest.raises(RecipeError) as exc:
get_recipe(tmp_path, content, session)
Expand Down Expand Up @@ -3283,8 +3283,43 @@ def test_invalid_metric(tmp_path, patched_datafinder, session):
""")
msg = (
"Expected one of ('rmse', 'weighted_rmse', 'pearsonr', "
"'weighted_pearsonr', 'emd', 'weighted_emd') for `metric`, got "
"'INVALID'"
"'weighted_pearsonr', 'emd', 'weighted_emd') for option `metric` of "
"preprocessor `distance_metric`, got 'INVALID'"
)
with pytest.raises(RecipeError) as exc:
get_recipe(tmp_path, content, session)
assert str(exc.value) == INITIALIZATION_ERROR_MSG
assert exc.value.failed_tasks[0].message == msg


def test_invalid_interpolate(tmp_path, patched_datafinder, session):
content = dedent("""
preprocessors:
test_resample_hours:
resample_hours:
interval: 1
interpolate: INVALID

diagnostics:
diagnostic_name:
variables:
ta:
preprocessor: test_resample_hours
project: CMIP6
mip: Amon
exp: historical
timerange: '20000101/20001231'
ensemble: r1i1p1f1
grid: gn
additional_datasets:
- {dataset: CanESM5}
- {dataset: CESM2, reference_for_bias: true}

scripts: null
""")
msg = (
"Expected one of (None, 'nearest', 'linear') for option `interpolate` "
"of preprocessor `resample_hours`, got 'INVALID'"
)
with pytest.raises(RecipeError) as exc:
get_recipe(tmp_path, content, session)
Expand Down
Loading