diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 938e4b6f2e..8123a9b071 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -11,6 +11,7 @@ import numpy as np import yaml +from ._other import fix_cubes_endianness from .._task import write_ncl_settings from ..cmor._fixes.shared import AtmosphereSigmaFactory from ._time import extract_time @@ -179,6 +180,7 @@ def concatenate(cubes): return cubes[0] _fix_cube_attributes(cubes) + fix_cubes_endianness(cubes) if len(cubes) > 1: # order cubes by first time point diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index 697e8b3347..373acb1964 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -5,6 +5,7 @@ import logging import dask.array as da +import numpy as np logger = logging.getLogger(__name__) @@ -38,3 +39,51 @@ def clip(cube, minimum=None, maximum=None): raise ValueError("Maximum should be equal or larger than minimum.") cube.data = da.clip(cube.core_data(), minimum, maximum) return cube + + +def fix_cubes_endianness(cubes): + """Transform cubes in big endian to little.""" + for cube in cubes: + if cube.dtype.byteorder == ">": + cube.data = _byteswap_array(cube.core_data()) + # Swap the coords also if neccesary + for coord in cube.coords(): + if coord.dtype.byteorder == ">": + coord.points = _byteswap_array(coord.core_points()) + if (coord.bounds is not None) and (coord.bounds.dtype.byteorder == ">"): + coord.bounds = _byteswap_array(coord.core_bounds()) + return cubes + + +def _byteswap_array(arr): + """ + Swaps the bytes of a numpy or dask array + """ + if isinstance(arr, da.Array): + return _byteswap_dask_array(arr) + elif isinstance(arr, np.ndarray): + return _byteswap_numpy_array(arr) + else: + raise NotImplementedError("Data type not supported") + + +def _byteswap_dask_array(arr): + """ + Swaps the bytes of a dask array + + byteswap and newbyteorder are not ufuncs and are not supported + neither by dask or iris. The workaround is to use map_blocks + to call the appropiate numpy functions over the dask array chunks + returned by core_data() See + https://github.com/dask/dask/issues/5689 + """ + swapped_da = arr.map_blocks(np.ndarray.byteswap).map_blocks( + np.ndarray.newbyteorder) + return swapped_da + + +def _byteswap_numpy_array(arr): + """ + Swaps the bytes of a numpy array + """ + return arr.byteswap().newbyteorder() diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py index 08a1ee26c0..30a2f84701 100644 --- a/tests/unit/preprocessor/_other/test_other.py +++ b/tests/unit/preprocessor/_other/test_other.py @@ -2,14 +2,17 @@ import unittest -import iris.coord_categorisation -import iris.coords +import dask.array as da import numpy as np +import iris.coord_categorisation +import pytest +from iris.coords import DimCoord + from cf_units import Unit from iris.cube import Cube from numpy.testing import assert_array_equal -from esmvalcore.preprocessor._other import clip +from esmvalcore.preprocessor._other import clip, fix_cubes_endianness class TestOther(unittest.TestCase): @@ -42,6 +45,41 @@ def test_clip(self): with self.assertRaises(ValueError): clip(cube, 10, 8) + @pytest.mark.parametrize("lazy", (True, False)) + def test_fix_cubes_endianness(self, lazy=True): + + def make_cube(data, big_endian=False): + dtype = ">f8" if big_endian else "