Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allowed ignoring of scalar time coords in multi_model_statistics #1961

Merged
merged 8 commits into from
May 19, 2023
68 changes: 45 additions & 23 deletions esmvalcore/preprocessor/_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,18 @@ def _map_to_new_time(cube, time_points):
try:
new_cube = cube.interpolate(sample_points, scheme)
except Exception as excinfo:
additional_info = ""
if cube.coords('time', dimensions=()):
additional_info = (
" Note: this alignment does not work for scalar time "
"coordinates. To ignore all scalar coordinates in the input "
"data, use the preprocessor option "
"`ignore_scalar_coords=True`."
)
raise ValueError(
f"Tried to align cubes in multi-model statistics, but failed for "
f"cube {cube}\n and time points {time_points}") from excinfo
f"cube {cube}\n and time points {time_points}.{additional_info}"
) from excinfo

# Change the dtype of int_time_coords to their original values
for coord_name in int_time_coords:
Expand Down Expand Up @@ -313,7 +322,7 @@ def _get_equal_coord_names_metadata(cubes, equal_coords_metadata):
return equal_names_metadata


def _equalise_coordinate_metadata(cubes, ignore_scalar_coords=False):
def _equalise_coordinate_metadata(cubes):
"""Equalise coordinates in cubes (in-place)."""
if not cubes:
return
Expand Down Expand Up @@ -356,17 +365,19 @@ def _equalise_coordinate_metadata(cubes, ignore_scalar_coords=False):
# Note: remaining differences will raise an error at a later stage
coord.long_name = None

# Remove scalar coordinates if desired. In addition, always remove
# specific scalar coordinates which are not expected to be equal in the
# input cubes.
# Remove special scalar coordinates which are not expected to be equal
# in the input cubes. Note: if `ignore_scalar_coords=True` is used for
# `multi_model_statistics`, the cubes do not contain scalar coordinates
# at this point anymore.
scalar_coords_to_always_remove = ['p0', 'ptop']
for scalar_coord in cube.coords(dimensions=()):
remove_coord = (
ignore_scalar_coords or
scalar_coord.var_name in scalar_coords_to_always_remove
)
if remove_coord:
if scalar_coord.var_name in scalar_coords_to_always_remove:
cube.remove_coord(scalar_coord)
schlunma marked this conversation as resolved.
Show resolved Hide resolved
logger.debug(
"Removed scalar coordinate '%s' from cube %s",
scalar_coord.var_name,
cube.summary(shorten=True),
)


def _equalise_fx_variables(cubes):
Expand Down Expand Up @@ -413,7 +424,7 @@ def _equalise_var_metadata(cubes):
setattr(cube, attr, equal_names_metadata[cube_id][attr])


def _combine(cubes, ignore_scalar_coords=False):
def _combine(cubes):
"""Merge iris cubes into a single big cube with new dimension.

This assumes that all input cubes have the same shape.
Expand All @@ -424,9 +435,7 @@ def _combine(cubes, ignore_scalar_coords=False):
equalise_attributes(cubes)
_equalise_var_metadata(cubes)
_equalise_cell_methods(cubes)
_equalise_coordinate_metadata(
cubes, ignore_scalar_coords=ignore_scalar_coords
)
_equalise_coordinate_metadata(cubes)
_equalise_fx_variables(cubes)

for i, cube in enumerate(cubes):
Expand Down Expand Up @@ -487,8 +496,12 @@ def _compute_slices(cubes):
yield slice(start, end)


def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
ignore_scalar_coords=False, **kwargs):
def _compute_eager(
cubes: list,
*,
operator: iris.analysis.Aggregator,
**kwargs,
):
"""Compute statistics one slice at a time."""
_ = [cube.data for cube in cubes] # make sure the cubes' data are realized

Expand All @@ -498,10 +511,7 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
single_model_slices = cubes # scalar cubes
else:
single_model_slices = [cube[chunk] for cube in cubes]
combined_slice = _combine(
single_model_slices,
ignore_scalar_coords=ignore_scalar_coords,
)
combined_slice = _combine(single_model_slices)
with warnings.catch_warnings():
warnings.filterwarnings(
'ignore',
Expand Down Expand Up @@ -570,9 +580,22 @@ def _multicube_statistics(cubes, statistics, span, ignore_scalar_coords=False):
# Avoid modifying inputs
copied_cubes = [cube.copy() for cube in cubes]

# Remove scalar coordinates in input cubes if desired to ignore them when
# merging
if ignore_scalar_coords:
for cube in copied_cubes:
for scalar_coord in cube.coords(dimensions=()):
cube.remove_coord(scalar_coord)
schlunma marked this conversation as resolved.
Show resolved Hide resolved
logger.debug(
"Removed scalar coordinate '%s' from cube %s since "
"ignore_scalar_coords=True",
scalar_coord.var_name,
cube.summary(shorten=True),
)

# If all cubes contain a time coordinate, align them. If no cube contains a
# time coordinate, do nothing. Else, raise an exception
time_coords = [cube.coords('time') for cube in cubes]
# time coordinate, do nothing. Else, raise an exception.
time_coords = [cube.coords('time') for cube in copied_cubes]
if all(time_coords):
aligned_cubes = _align_time_coord(copied_cubes, span=span)
elif not any(time_coords):
Expand All @@ -592,7 +615,6 @@ def _multicube_statistics(cubes, statistics, span, ignore_scalar_coords=False):

result_cube = _compute_eager(aligned_cubes,
operator=operator,
ignore_scalar_coords=ignore_scalar_coords,
**kwargs)
statistics_cubes[statistic] = result_cube

Expand Down
128 changes: 110 additions & 18 deletions tests/sample_data/multimodel_statistics/test_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import iris
import numpy as np
import pytest
from iris.coords import AuxCoord

from esmvalcore.preprocessor import extract_time
from esmvalcore.preprocessor._multimodel import multi_model_statistics
Expand Down Expand Up @@ -156,13 +157,14 @@ def calendar(cube):
return cube_dict


def multimodel_test(cubes, statistic, span):
def multimodel_test(cubes, statistic, span, **kwargs):
"""Run multimodel test with some simple checks."""
statistics = [statistic]

result = multi_model_statistics(products=cubes,
statistics=statistics,
span=span)
span=span,
**kwargs)
assert isinstance(result, dict)
assert statistic in result

Expand Down Expand Up @@ -279,21 +281,21 @@ def test_multimodel_regression_day_proleptic_gregorian(


@pytest.mark.use_sample_data
def test_multimodel_no_vertical_dimension(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_no_vertical_dimension(timeseries_cubes_month, span):
"""Test statistic without vertical dimension using monthly data."""
span = 'full'
cubes = [cube[:, 0] for cube in timeseries_cubes_month]
multimodel_test(cubes, span=span, statistic='mean')


@pytest.mark.use_sample_data
def test_multimodel_merge_error(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_merge_error(timeseries_cubes_month, span):
"""Test statistic with slightly different vertical coordinates.

See https://github.com/ESMValGroup/ESMValCore/issues/956.

"""
span = 'full'
cubes = timeseries_cubes_month
msg = (
"Multi-model statistics failed to merge input cubes into a single "
Expand All @@ -304,32 +306,32 @@ def test_multimodel_merge_error(timeseries_cubes_month):


@pytest.mark.use_sample_data
def test_multimodel_only_time_dimension(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_only_time_dimension(timeseries_cubes_month, span):
"""Test statistic without only the time dimension using monthly data."""
span = 'full'
cubes = [cube[:, 0, 0, 0] for cube in timeseries_cubes_month]
multimodel_test(cubes, span=span, statistic='mean')


@pytest.mark.use_sample_data
def test_multimodel_no_time_dimension(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_no_time_dimension(timeseries_cubes_month, span):
"""Test statistic without time dimension using monthly data.

Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.

"""
span = 'full'
cubes = [cube[0, 0] for cube in timeseries_cubes_month]

result = multimodel_test(cubes, span=span, statistic='mean')['mean']
assert result.shape == (3, 2)


@pytest.mark.use_sample_data
def test_multimodel_scalar_cubes(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_scalar_cubes(timeseries_cubes_month, span):
"""Test statistic with scalar cubes."""
span = 'full'
cubes = [cube[0, 0, 0, 0] for cube in timeseries_cubes_month]

result = multimodel_test(cubes, span=span, statistic='mean')['mean']
Expand All @@ -338,14 +340,14 @@ def test_multimodel_scalar_cubes(timeseries_cubes_month):


@pytest.mark.use_sample_data
def test_multimodel_0d_and_1d_time_dimensions(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_0d_1d_time_no_ignore_scalars(timeseries_cubes_month, span):
"""Test statistic fail on 0D and 1D time dimension using monthly data.

Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.

"""
span = 'full'
cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim
cubes[1] = cubes[1][0] # use 0D time dim for one cube

Expand All @@ -355,14 +357,37 @@ def test_multimodel_0d_and_1d_time_dimensions(timeseries_cubes_month):


@pytest.mark.use_sample_data
def test_multimodel_only_some_time_dimensions(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_0d_1d_time_ignore_scalars(timeseries_cubes_month, span):
"""Test statistic fail on 0D and 1D time dimension using monthly data.

Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.

"""
cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim
cubes[1] = cubes[1][0] # use 0D time dim for one cube

msg = (
"Multi-model statistics failed to merge input cubes into a single "
"array: some cubes have a 'time' dimension, some do not have a 'time' "
"dimension."
)
with pytest.raises(ValueError, match=msg):
multimodel_test(
cubes, span=span, statistic='mean', ignore_scalar_coords=True
)


@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_only_some_time_dimensions(timeseries_cubes_month, span):
"""Test statistic fail if only some cubes have time dimension.

Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.

"""
span = 'full'
cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim

# Remove time dimension for one cube
Expand All @@ -379,14 +404,14 @@ def test_multimodel_only_some_time_dimensions(timeseries_cubes_month):


@pytest.mark.use_sample_data
def test_multimodel_0d_different_time_dimensions(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_diff_scalar_time_fail(timeseries_cubes_month, span):
"""Test statistic fail on different scalar time dimensions.

Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.

"""
span = 'full'
cubes = [cube[0, 0] for cube in timeseries_cubes_month]

# Use different scalar time point and bounds for one cube
Expand All @@ -396,3 +421,70 @@ def test_multimodel_0d_different_time_dimensions(timeseries_cubes_month):
msg = "Tried to align cubes in multi-model statistics, but failed for cube"
with pytest.raises(ValueError, match=msg):
multimodel_test(cubes, span=span, statistic='mean')


@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_diff_scalar_time_ignore(timeseries_cubes_month, span):
"""Ignore different scalar time dimensions.

Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.
schlunma marked this conversation as resolved.
Show resolved Hide resolved

"""
cubes = [cube[0, 0] for cube in timeseries_cubes_month]

# Use different scalar time point and bounds for one cube
cubes[1].coord('time').points = 20.0
cubes[1].coord('time').bounds = [0.0, 40.0]

result = multimodel_test(
cubes, span=span, statistic='mean', ignore_scalar_coords=True
)['mean']
assert result.shape == (3, 2)


@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_ignore_scalar_coords(timeseries_cubes_month, span):
"""Test statistic does not fail on different scalar coords when ignored.

Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.

"""
cubes = [cube[0, 0] for cube in timeseries_cubes_month]
for (idx, cube) in enumerate(cubes):
aux_coord = AuxCoord(0.0, var_name=f'name_{idx}')
cube.add_aux_coord(aux_coord, ())

result = multimodel_test(
cubes, span=span, statistic='mean', ignore_scalar_coords=True
)['mean']
assert result.shape == (3, 2)

# Make sure that the input cubes still contain the scalar coords
for (idx, cube) in enumerate(cubes):
assert cube.coord(var_name=f'name_{idx}', dimensions=())


@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_do_not_ignore_scalar_coords(timeseries_cubes_month, span):
"""Test statistic fail on different scalar coords.

Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.

"""
cubes = [cube[0, 0] for cube in timeseries_cubes_month]
for (idx, cube) in enumerate(cubes):
aux_coord = AuxCoord(0.0, var_name=f'name_{idx}')
cube.add_aux_coord(aux_coord, ())

msg = (
"Multi-model statistics failed to merge input cubes into a single "
"array"
)
with pytest.raises(ValueError, match=msg):
multimodel_test(cubes, span=span, statistic='mean')
17 changes: 2 additions & 15 deletions tests/unit/preprocessor/_multimodel/test_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,21 +524,8 @@ def test_combine_inconsistent_var_names_fail():
mm._combine(cubes)


def test_combine_with_scalar_coords_to_remove():
"""Test _combine with scalar coordinates that should be removed."""
cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(3))
scalar_coord_0 = AuxCoord(0.0, standard_name='height', units='m')
scalar_coord_1 = AuxCoord(1.0, long_name='Test scalar coordinate')
cubes[0].add_aux_coord(scalar_coord_0, ())
cubes[1].add_aux_coord(scalar_coord_1, ())

merged_cube = mm._combine(cubes, ignore_scalar_coords=True)
assert merged_cube.shape == (3, 3)
assert not merged_cube.coords(dimensions=())


def test_combine_with_scalar_coords_to_remove_fail():
"""Test _combine with scalar coordinates that should not be removed."""
def test_combine_differing_scalar_coords_fail():
"""Test _combine with differing scalar coordinates."""
cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(2))
scalar_coord_0 = AuxCoord(0.0, standard_name='height', units='m')
cubes[0].add_aux_coord(scalar_coord_0, ())
Expand Down