Skip to content

Commit

Permalink
Allowed ignoring of scalar time coords in multi_model_statistics (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
schlunma authored May 19, 2023
1 parent 5593c1a commit 8b6be3c
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 64 deletions.
68 changes: 45 additions & 23 deletions esmvalcore/preprocessor/_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,18 @@ def _map_to_new_time(cube, time_points):
try:
new_cube = cube.interpolate(sample_points, scheme)
except Exception as excinfo:
additional_info = ""
if cube.coords('time', dimensions=()):
additional_info = (
" Note: this alignment does not work for scalar time "
"coordinates. To ignore all scalar coordinates in the input "
"data, use the preprocessor option "
"`ignore_scalar_coords=True`."
)
raise ValueError(
f"Tried to align cubes in multi-model statistics, but failed for "
f"cube {cube}\n and time points {time_points}") from excinfo
f"cube {cube}\n and time points {time_points}.{additional_info}"
) from excinfo

# Change the dtype of int_time_coords to their original values
for coord_name in int_time_coords:
Expand Down Expand Up @@ -313,7 +322,7 @@ def _get_equal_coord_names_metadata(cubes, equal_coords_metadata):
return equal_names_metadata


def _equalise_coordinate_metadata(cubes, ignore_scalar_coords=False):
def _equalise_coordinate_metadata(cubes):
"""Equalise coordinates in cubes (in-place)."""
if not cubes:
return
Expand Down Expand Up @@ -356,17 +365,19 @@ def _equalise_coordinate_metadata(cubes, ignore_scalar_coords=False):
# Note: remaining differences will raise an error at a later stage
coord.long_name = None

# Remove scalar coordinates if desired. In addition, always remove
# specific scalar coordinates which are not expected to be equal in the
# input cubes.
# Remove special scalar coordinates which are not expected to be equal
# in the input cubes. Note: if `ignore_scalar_coords=True` is used for
# `multi_model_statistics`, the cubes do not contain scalar coordinates
# at this point anymore.
scalar_coords_to_always_remove = ['p0', 'ptop']
for scalar_coord in cube.coords(dimensions=()):
remove_coord = (
ignore_scalar_coords or
scalar_coord.var_name in scalar_coords_to_always_remove
)
if remove_coord:
if scalar_coord.var_name in scalar_coords_to_always_remove:
cube.remove_coord(scalar_coord)
logger.debug(
"Removed scalar coordinate '%s' from cube %s",
scalar_coord.var_name,
cube.summary(shorten=True),
)


def _equalise_fx_variables(cubes):
Expand Down Expand Up @@ -413,7 +424,7 @@ def _equalise_var_metadata(cubes):
setattr(cube, attr, equal_names_metadata[cube_id][attr])


def _combine(cubes, ignore_scalar_coords=False):
def _combine(cubes):
"""Merge iris cubes into a single big cube with new dimension.
This assumes that all input cubes have the same shape.
Expand All @@ -424,9 +435,7 @@ def _combine(cubes, ignore_scalar_coords=False):
equalise_attributes(cubes)
_equalise_var_metadata(cubes)
_equalise_cell_methods(cubes)
_equalise_coordinate_metadata(
cubes, ignore_scalar_coords=ignore_scalar_coords
)
_equalise_coordinate_metadata(cubes)
_equalise_fx_variables(cubes)

for i, cube in enumerate(cubes):
Expand Down Expand Up @@ -487,8 +496,12 @@ def _compute_slices(cubes):
yield slice(start, end)


def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
ignore_scalar_coords=False, **kwargs):
def _compute_eager(
cubes: list,
*,
operator: iris.analysis.Aggregator,
**kwargs,
):
"""Compute statistics one slice at a time."""
_ = [cube.data for cube in cubes] # make sure the cubes' data are realized

Expand All @@ -498,10 +511,7 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
single_model_slices = cubes # scalar cubes
else:
single_model_slices = [cube[chunk] for cube in cubes]
combined_slice = _combine(
single_model_slices,
ignore_scalar_coords=ignore_scalar_coords,
)
combined_slice = _combine(single_model_slices)
with warnings.catch_warnings():
warnings.filterwarnings(
'ignore',
Expand Down Expand Up @@ -570,9 +580,22 @@ def _multicube_statistics(cubes, statistics, span, ignore_scalar_coords=False):
# Avoid modifying inputs
copied_cubes = [cube.copy() for cube in cubes]

# Remove scalar coordinates in input cubes if desired to ignore them when
# merging
if ignore_scalar_coords:
for cube in copied_cubes:
for scalar_coord in cube.coords(dimensions=()):
cube.remove_coord(scalar_coord)
logger.debug(
"Removed scalar coordinate '%s' from cube %s since "
"ignore_scalar_coords=True",
scalar_coord.var_name,
cube.summary(shorten=True),
)

# If all cubes contain a time coordinate, align them. If no cube contains a
# time coordinate, do nothing. Else, raise an exception
time_coords = [cube.coords('time') for cube in cubes]
# time coordinate, do nothing. Else, raise an exception.
time_coords = [cube.coords('time') for cube in copied_cubes]
if all(time_coords):
aligned_cubes = _align_time_coord(copied_cubes, span=span)
elif not any(time_coords):
Expand All @@ -592,7 +615,6 @@ def _multicube_statistics(cubes, statistics, span, ignore_scalar_coords=False):

result_cube = _compute_eager(aligned_cubes,
operator=operator,
ignore_scalar_coords=ignore_scalar_coords,
**kwargs)
statistics_cubes[statistic] = result_cube

Expand Down
160 changes: 134 additions & 26 deletions tests/sample_data/multimodel_statistics/test_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import iris
import numpy as np
import pytest
from iris.coords import AuxCoord

from esmvalcore.preprocessor import extract_time
from esmvalcore.preprocessor._multimodel import multi_model_statistics
Expand Down Expand Up @@ -156,13 +157,14 @@ def calendar(cube):
return cube_dict


def multimodel_test(cubes, statistic, span):
def multimodel_test(cubes, statistic, span, **kwargs):
"""Run multimodel test with some simple checks."""
statistics = [statistic]

result = multi_model_statistics(products=cubes,
statistics=statistics,
span=span)
span=span,
**kwargs)
assert isinstance(result, dict)
assert statistic in result

Expand Down Expand Up @@ -279,21 +281,21 @@ def test_multimodel_regression_day_proleptic_gregorian(


@pytest.mark.use_sample_data
def test_multimodel_no_vertical_dimension(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_no_vertical_dimension(timeseries_cubes_month, span):
"""Test statistic without vertical dimension using monthly data."""
span = 'full'
cubes = [cube[:, 0] for cube in timeseries_cubes_month]
multimodel_test(cubes, span=span, statistic='mean')


@pytest.mark.use_sample_data
def test_multimodel_merge_error(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_merge_error(timeseries_cubes_month, span):
"""Test statistic with slightly different vertical coordinates.
See https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
span = 'full'
cubes = timeseries_cubes_month
msg = (
"Multi-model statistics failed to merge input cubes into a single "
Expand All @@ -304,32 +306,34 @@ def test_multimodel_merge_error(timeseries_cubes_month):


@pytest.mark.use_sample_data
def test_multimodel_only_time_dimension(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_only_time_dimension(timeseries_cubes_month, span):
"""Test statistic without only the time dimension using monthly data."""
span = 'full'
cubes = [cube[:, 0, 0, 0] for cube in timeseries_cubes_month]
multimodel_test(cubes, span=span, statistic='mean')


@pytest.mark.use_sample_data
def test_multimodel_no_time_dimension(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_no_time_dimension(timeseries_cubes_month, span):
"""Test statistic without time dimension using monthly data.
Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.
Note: we collapse the air_pressure dimension here (by selecting only its
first value) since the original coordinate differs slightly across cubes
and leads to merge errors. See also
https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
span = 'full'
cubes = [cube[0, 0] for cube in timeseries_cubes_month]

result = multimodel_test(cubes, span=span, statistic='mean')['mean']
assert result.shape == (3, 2)


@pytest.mark.use_sample_data
def test_multimodel_scalar_cubes(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_scalar_cubes(timeseries_cubes_month, span):
"""Test statistic with scalar cubes."""
span = 'full'
cubes = [cube[0, 0, 0, 0] for cube in timeseries_cubes_month]

result = multimodel_test(cubes, span=span, statistic='mean')['mean']
Expand All @@ -338,14 +342,16 @@ def test_multimodel_scalar_cubes(timeseries_cubes_month):


@pytest.mark.use_sample_data
def test_multimodel_0d_and_1d_time_dimensions(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_0d_1d_time_no_ignore_scalars(timeseries_cubes_month, span):
"""Test statistic fail on 0D and 1D time dimension using monthly data.
Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.
Note: we collapse the air_pressure dimension here (by selecting only its
first value) since the original coordinate differs slightly across cubes
and leads to merge errors. See also
https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
span = 'full'
cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim
cubes[1] = cubes[1][0] # use 0D time dim for one cube

Expand All @@ -355,14 +361,41 @@ def test_multimodel_0d_and_1d_time_dimensions(timeseries_cubes_month):


@pytest.mark.use_sample_data
def test_multimodel_only_some_time_dimensions(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_0d_1d_time_ignore_scalars(timeseries_cubes_month, span):
"""Test statistic fail on 0D and 1D time dimension using monthly data.
Note: we collapse the air_pressure dimension here (by selecting only its
first value) since the original coordinate differs slightly across cubes
and leads to merge errors. See also
https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim
cubes[1] = cubes[1][0] # use 0D time dim for one cube

msg = (
"Multi-model statistics failed to merge input cubes into a single "
"array: some cubes have a 'time' dimension, some do not have a 'time' "
"dimension."
)
with pytest.raises(ValueError, match=msg):
multimodel_test(
cubes, span=span, statistic='mean', ignore_scalar_coords=True
)


@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_only_some_time_dimensions(timeseries_cubes_month, span):
"""Test statistic fail if only some cubes have time dimension.
Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.
Note: we collapse the air_pressure dimension here (by selecting only its
first value) since the original coordinate differs slightly across cubes
and leads to merge errors. See also
https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
span = 'full'
cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim

# Remove time dimension for one cube
Expand All @@ -379,14 +412,16 @@ def test_multimodel_only_some_time_dimensions(timeseries_cubes_month):


@pytest.mark.use_sample_data
def test_multimodel_0d_different_time_dimensions(timeseries_cubes_month):
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_diff_scalar_time_fail(timeseries_cubes_month, span):
"""Test statistic fail on different scalar time dimensions.
Also remove air_pressure dimensions since this slightly differs across
cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956.
Note: we collapse the air_pressure dimension here (by selecting only its
first value) since the original coordinate differs slightly across cubes
and leads to merge errors. See also
https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
span = 'full'
cubes = [cube[0, 0] for cube in timeseries_cubes_month]

# Use different scalar time point and bounds for one cube
Expand All @@ -396,3 +431,76 @@ def test_multimodel_0d_different_time_dimensions(timeseries_cubes_month):
msg = "Tried to align cubes in multi-model statistics, but failed for cube"
with pytest.raises(ValueError, match=msg):
multimodel_test(cubes, span=span, statistic='mean')


@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_diff_scalar_time_ignore(timeseries_cubes_month, span):
"""Ignore different scalar time dimensions.
Note: we collapse the air_pressure dimension here (by selecting only its
first value) since the original coordinate differs slightly across cubes
and leads to merge errors. See also
https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
cubes = [cube[0, 0] for cube in timeseries_cubes_month]

# Use different scalar time point and bounds for one cube
cubes[1].coord('time').points = 20.0
cubes[1].coord('time').bounds = [0.0, 40.0]

result = multimodel_test(
cubes, span=span, statistic='mean', ignore_scalar_coords=True
)['mean']
assert result.shape == (3, 2)


@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_ignore_scalar_coords(timeseries_cubes_month, span):
"""Test statistic does not fail on different scalar coords when ignored.
Note: we collapse the air_pressure dimension here (by selecting only its
first value) since the original coordinate differs slightly across cubes
and leads to merge errors. See also
https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
cubes = [cube[0, 0] for cube in timeseries_cubes_month]
for (idx, cube) in enumerate(cubes):
aux_coord = AuxCoord(0.0, var_name=f'name_{idx}')
cube.add_aux_coord(aux_coord, ())

result = multimodel_test(
cubes, span=span, statistic='mean', ignore_scalar_coords=True
)['mean']
assert result.shape == (3, 2)

# Make sure that the input cubes still contain the scalar coords
for (idx, cube) in enumerate(cubes):
assert cube.coord(var_name=f'name_{idx}', dimensions=())


@pytest.mark.use_sample_data
@pytest.mark.parametrize('span', SPAN_PARAMS)
def test_multimodel_do_not_ignore_scalar_coords(timeseries_cubes_month, span):
"""Test statistic fail on different scalar coords.
Note: we collapse the air_pressure dimension here (by selecting only its
first value) since the original coordinate differs slightly across cubes
and leads to merge errors. See also
https://github.com/ESMValGroup/ESMValCore/issues/956.
"""
cubes = [cube[0, 0] for cube in timeseries_cubes_month]
for (idx, cube) in enumerate(cubes):
aux_coord = AuxCoord(0.0, var_name=f'name_{idx}')
cube.add_aux_coord(aux_coord, ())

msg = (
"Multi-model statistics failed to merge input cubes into a single "
"array"
)
with pytest.raises(ValueError, match=msg):
multimodel_test(cubes, span=span, statistic='mean')
Loading

0 comments on commit 8b6be3c

Please sign in to comment.