diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index f808d1d9c5..9bb6d6e2af 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -198,9 +198,18 @@ def _map_to_new_time(cube, time_points): try: new_cube = cube.interpolate(sample_points, scheme) except Exception as excinfo: + additional_info = "" + if cube.coords('time', dimensions=()): + additional_info = ( + " Note: this alignment does not work for scalar time " + "coordinates. To ignore all scalar coordinates in the input " + "data, use the preprocessor option " + "`ignore_scalar_coords=True`." + ) raise ValueError( f"Tried to align cubes in multi-model statistics, but failed for " - f"cube {cube}\n and time points {time_points}") from excinfo + f"cube {cube}\n and time points {time_points}.{additional_info}" + ) from excinfo # Change the dtype of int_time_coords to their original values for coord_name in int_time_coords: @@ -313,7 +322,7 @@ def _get_equal_coord_names_metadata(cubes, equal_coords_metadata): return equal_names_metadata -def _equalise_coordinate_metadata(cubes, ignore_scalar_coords=False): +def _equalise_coordinate_metadata(cubes): """Equalise coordinates in cubes (in-place).""" if not cubes: return @@ -356,17 +365,19 @@ def _equalise_coordinate_metadata(cubes, ignore_scalar_coords=False): # Note: remaining differences will raise an error at a later stage coord.long_name = None - # Remove scalar coordinates if desired. In addition, always remove - # specific scalar coordinates which are not expected to be equal in the - # input cubes. + # Remove special scalar coordinates which are not expected to be equal + # in the input cubes. Note: if `ignore_scalar_coords=True` is used for + # `multi_model_statistics`, the cubes do not contain scalar coordinates + # at this point anymore. scalar_coords_to_always_remove = ['p0', 'ptop'] for scalar_coord in cube.coords(dimensions=()): - remove_coord = ( - ignore_scalar_coords or - scalar_coord.var_name in scalar_coords_to_always_remove - ) - if remove_coord: + if scalar_coord.var_name in scalar_coords_to_always_remove: cube.remove_coord(scalar_coord) + logger.debug( + "Removed scalar coordinate '%s' from cube %s", + scalar_coord.var_name, + cube.summary(shorten=True), + ) def _equalise_fx_variables(cubes): @@ -413,7 +424,7 @@ def _equalise_var_metadata(cubes): setattr(cube, attr, equal_names_metadata[cube_id][attr]) -def _combine(cubes, ignore_scalar_coords=False): +def _combine(cubes): """Merge iris cubes into a single big cube with new dimension. This assumes that all input cubes have the same shape. @@ -424,9 +435,7 @@ def _combine(cubes, ignore_scalar_coords=False): equalise_attributes(cubes) _equalise_var_metadata(cubes) _equalise_cell_methods(cubes) - _equalise_coordinate_metadata( - cubes, ignore_scalar_coords=ignore_scalar_coords - ) + _equalise_coordinate_metadata(cubes) _equalise_fx_variables(cubes) for i, cube in enumerate(cubes): @@ -487,8 +496,12 @@ def _compute_slices(cubes): yield slice(start, end) -def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator, - ignore_scalar_coords=False, **kwargs): +def _compute_eager( + cubes: list, + *, + operator: iris.analysis.Aggregator, + **kwargs, +): """Compute statistics one slice at a time.""" _ = [cube.data for cube in cubes] # make sure the cubes' data are realized @@ -498,10 +511,7 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator, single_model_slices = cubes # scalar cubes else: single_model_slices = [cube[chunk] for cube in cubes] - combined_slice = _combine( - single_model_slices, - ignore_scalar_coords=ignore_scalar_coords, - ) + combined_slice = _combine(single_model_slices) with warnings.catch_warnings(): warnings.filterwarnings( 'ignore', @@ -570,9 +580,22 @@ def _multicube_statistics(cubes, statistics, span, ignore_scalar_coords=False): # Avoid modifying inputs copied_cubes = [cube.copy() for cube in cubes] + # Remove scalar coordinates in input cubes if desired to ignore them when + # merging + if ignore_scalar_coords: + for cube in copied_cubes: + for scalar_coord in cube.coords(dimensions=()): + cube.remove_coord(scalar_coord) + logger.debug( + "Removed scalar coordinate '%s' from cube %s since " + "ignore_scalar_coords=True", + scalar_coord.var_name, + cube.summary(shorten=True), + ) + # If all cubes contain a time coordinate, align them. If no cube contains a - # time coordinate, do nothing. Else, raise an exception - time_coords = [cube.coords('time') for cube in cubes] + # time coordinate, do nothing. Else, raise an exception. + time_coords = [cube.coords('time') for cube in copied_cubes] if all(time_coords): aligned_cubes = _align_time_coord(copied_cubes, span=span) elif not any(time_coords): @@ -592,7 +615,6 @@ def _multicube_statistics(cubes, statistics, span, ignore_scalar_coords=False): result_cube = _compute_eager(aligned_cubes, operator=operator, - ignore_scalar_coords=ignore_scalar_coords, **kwargs) statistics_cubes[statistic] = result_cube diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index f69c424d3d..28b7640e6c 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -10,6 +10,7 @@ import iris import numpy as np import pytest +from iris.coords import AuxCoord from esmvalcore.preprocessor import extract_time from esmvalcore.preprocessor._multimodel import multi_model_statistics @@ -156,13 +157,14 @@ def calendar(cube): return cube_dict -def multimodel_test(cubes, statistic, span): +def multimodel_test(cubes, statistic, span, **kwargs): """Run multimodel test with some simple checks.""" statistics = [statistic] result = multi_model_statistics(products=cubes, statistics=statistics, - span=span) + span=span, + **kwargs) assert isinstance(result, dict) assert statistic in result @@ -279,21 +281,21 @@ def test_multimodel_regression_day_proleptic_gregorian( @pytest.mark.use_sample_data -def test_multimodel_no_vertical_dimension(timeseries_cubes_month): +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_no_vertical_dimension(timeseries_cubes_month, span): """Test statistic without vertical dimension using monthly data.""" - span = 'full' cubes = [cube[:, 0] for cube in timeseries_cubes_month] multimodel_test(cubes, span=span, statistic='mean') @pytest.mark.use_sample_data -def test_multimodel_merge_error(timeseries_cubes_month): +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_merge_error(timeseries_cubes_month, span): """Test statistic with slightly different vertical coordinates. See https://github.com/ESMValGroup/ESMValCore/issues/956. """ - span = 'full' cubes = timeseries_cubes_month msg = ( "Multi-model statistics failed to merge input cubes into a single " @@ -304,22 +306,24 @@ def test_multimodel_merge_error(timeseries_cubes_month): @pytest.mark.use_sample_data -def test_multimodel_only_time_dimension(timeseries_cubes_month): +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_only_time_dimension(timeseries_cubes_month, span): """Test statistic without only the time dimension using monthly data.""" - span = 'full' cubes = [cube[:, 0, 0, 0] for cube in timeseries_cubes_month] multimodel_test(cubes, span=span, statistic='mean') @pytest.mark.use_sample_data -def test_multimodel_no_time_dimension(timeseries_cubes_month): +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_no_time_dimension(timeseries_cubes_month, span): """Test statistic without time dimension using monthly data. - Also remove air_pressure dimensions since this slightly differs across - cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956. + Note: we collapse the air_pressure dimension here (by selecting only its + first value) since the original coordinate differs slightly across cubes + and leads to merge errors. See also + https://github.com/ESMValGroup/ESMValCore/issues/956. """ - span = 'full' cubes = [cube[0, 0] for cube in timeseries_cubes_month] result = multimodel_test(cubes, span=span, statistic='mean')['mean'] @@ -327,9 +331,9 @@ def test_multimodel_no_time_dimension(timeseries_cubes_month): @pytest.mark.use_sample_data -def test_multimodel_scalar_cubes(timeseries_cubes_month): +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_scalar_cubes(timeseries_cubes_month, span): """Test statistic with scalar cubes.""" - span = 'full' cubes = [cube[0, 0, 0, 0] for cube in timeseries_cubes_month] result = multimodel_test(cubes, span=span, statistic='mean')['mean'] @@ -338,14 +342,16 @@ def test_multimodel_scalar_cubes(timeseries_cubes_month): @pytest.mark.use_sample_data -def test_multimodel_0d_and_1d_time_dimensions(timeseries_cubes_month): +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_0d_1d_time_no_ignore_scalars(timeseries_cubes_month, span): """Test statistic fail on 0D and 1D time dimension using monthly data. - Also remove air_pressure dimensions since this slightly differs across - cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956. + Note: we collapse the air_pressure dimension here (by selecting only its + first value) since the original coordinate differs slightly across cubes + and leads to merge errors. See also + https://github.com/ESMValGroup/ESMValCore/issues/956. """ - span = 'full' cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim cubes[1] = cubes[1][0] # use 0D time dim for one cube @@ -355,14 +361,41 @@ def test_multimodel_0d_and_1d_time_dimensions(timeseries_cubes_month): @pytest.mark.use_sample_data -def test_multimodel_only_some_time_dimensions(timeseries_cubes_month): +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_0d_1d_time_ignore_scalars(timeseries_cubes_month, span): + """Test statistic fail on 0D and 1D time dimension using monthly data. + + Note: we collapse the air_pressure dimension here (by selecting only its + first value) since the original coordinate differs slightly across cubes + and leads to merge errors. See also + https://github.com/ESMValGroup/ESMValCore/issues/956. + + """ + cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim + cubes[1] = cubes[1][0] # use 0D time dim for one cube + + msg = ( + "Multi-model statistics failed to merge input cubes into a single " + "array: some cubes have a 'time' dimension, some do not have a 'time' " + "dimension." + ) + with pytest.raises(ValueError, match=msg): + multimodel_test( + cubes, span=span, statistic='mean', ignore_scalar_coords=True + ) + + +@pytest.mark.use_sample_data +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_only_some_time_dimensions(timeseries_cubes_month, span): """Test statistic fail if only some cubes have time dimension. - Also remove air_pressure dimensions since this slightly differs across - cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956. + Note: we collapse the air_pressure dimension here (by selecting only its + first value) since the original coordinate differs slightly across cubes + and leads to merge errors. See also + https://github.com/ESMValGroup/ESMValCore/issues/956. """ - span = 'full' cubes = [cube[:, 0] for cube in timeseries_cubes_month] # remove Z-dim # Remove time dimension for one cube @@ -379,14 +412,16 @@ def test_multimodel_only_some_time_dimensions(timeseries_cubes_month): @pytest.mark.use_sample_data -def test_multimodel_0d_different_time_dimensions(timeseries_cubes_month): +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_diff_scalar_time_fail(timeseries_cubes_month, span): """Test statistic fail on different scalar time dimensions. - Also remove air_pressure dimensions since this slightly differs across - cubes. See https://github.com/ESMValGroup/ESMValCore/issues/956. + Note: we collapse the air_pressure dimension here (by selecting only its + first value) since the original coordinate differs slightly across cubes + and leads to merge errors. See also + https://github.com/ESMValGroup/ESMValCore/issues/956. """ - span = 'full' cubes = [cube[0, 0] for cube in timeseries_cubes_month] # Use different scalar time point and bounds for one cube @@ -396,3 +431,76 @@ def test_multimodel_0d_different_time_dimensions(timeseries_cubes_month): msg = "Tried to align cubes in multi-model statistics, but failed for cube" with pytest.raises(ValueError, match=msg): multimodel_test(cubes, span=span, statistic='mean') + + +@pytest.mark.use_sample_data +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_diff_scalar_time_ignore(timeseries_cubes_month, span): + """Ignore different scalar time dimensions. + + Note: we collapse the air_pressure dimension here (by selecting only its + first value) since the original coordinate differs slightly across cubes + and leads to merge errors. See also + https://github.com/ESMValGroup/ESMValCore/issues/956. + + """ + cubes = [cube[0, 0] for cube in timeseries_cubes_month] + + # Use different scalar time point and bounds for one cube + cubes[1].coord('time').points = 20.0 + cubes[1].coord('time').bounds = [0.0, 40.0] + + result = multimodel_test( + cubes, span=span, statistic='mean', ignore_scalar_coords=True + )['mean'] + assert result.shape == (3, 2) + + +@pytest.mark.use_sample_data +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_ignore_scalar_coords(timeseries_cubes_month, span): + """Test statistic does not fail on different scalar coords when ignored. + + Note: we collapse the air_pressure dimension here (by selecting only its + first value) since the original coordinate differs slightly across cubes + and leads to merge errors. See also + https://github.com/ESMValGroup/ESMValCore/issues/956. + + """ + cubes = [cube[0, 0] for cube in timeseries_cubes_month] + for (idx, cube) in enumerate(cubes): + aux_coord = AuxCoord(0.0, var_name=f'name_{idx}') + cube.add_aux_coord(aux_coord, ()) + + result = multimodel_test( + cubes, span=span, statistic='mean', ignore_scalar_coords=True + )['mean'] + assert result.shape == (3, 2) + + # Make sure that the input cubes still contain the scalar coords + for (idx, cube) in enumerate(cubes): + assert cube.coord(var_name=f'name_{idx}', dimensions=()) + + +@pytest.mark.use_sample_data +@pytest.mark.parametrize('span', SPAN_PARAMS) +def test_multimodel_do_not_ignore_scalar_coords(timeseries_cubes_month, span): + """Test statistic fail on different scalar coords. + + Note: we collapse the air_pressure dimension here (by selecting only its + first value) since the original coordinate differs slightly across cubes + and leads to merge errors. See also + https://github.com/ESMValGroup/ESMValCore/issues/956. + + """ + cubes = [cube[0, 0] for cube in timeseries_cubes_month] + for (idx, cube) in enumerate(cubes): + aux_coord = AuxCoord(0.0, var_name=f'name_{idx}') + cube.add_aux_coord(aux_coord, ()) + + msg = ( + "Multi-model statistics failed to merge input cubes into a single " + "array" + ) + with pytest.raises(ValueError, match=msg): + multimodel_test(cubes, span=span, statistic='mean') diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 52f72554e4..55326ab642 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -524,21 +524,8 @@ def test_combine_inconsistent_var_names_fail(): mm._combine(cubes) -def test_combine_with_scalar_coords_to_remove(): - """Test _combine with scalar coordinates that should be removed.""" - cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(3)) - scalar_coord_0 = AuxCoord(0.0, standard_name='height', units='m') - scalar_coord_1 = AuxCoord(1.0, long_name='Test scalar coordinate') - cubes[0].add_aux_coord(scalar_coord_0, ()) - cubes[1].add_aux_coord(scalar_coord_1, ()) - - merged_cube = mm._combine(cubes, ignore_scalar_coords=True) - assert merged_cube.shape == (3, 3) - assert not merged_cube.coords(dimensions=()) - - -def test_combine_with_scalar_coords_to_remove_fail(): - """Test _combine with scalar coordinates that should not be removed.""" +def test_combine_differing_scalar_coords_fail(): + """Test _combine with differing scalar coordinates.""" cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(2)) scalar_coord_0 = AuxCoord(0.0, standard_name='height', units='m') cubes[0].add_aux_coord(scalar_coord_0, ())