Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Flip big endian arrays before concatenation #1068

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions esmvalcore/preprocessor/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np
import yaml

from ._other import fix_cubes_endianness
from .._task import write_ncl_settings
from ..cmor._fixes.shared import AtmosphereSigmaFactory
from ._time import extract_time
Expand Down Expand Up @@ -179,6 +180,7 @@ def concatenate(cubes):
return cubes[0]

_fix_cube_attributes(cubes)
fix_cubes_endianness(cubes)

if len(cubes) > 1:
# order cubes by first time point
Expand Down
49 changes: 49 additions & 0 deletions esmvalcore/preprocessor/_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging

import dask.array as da
import numpy as np

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -38,3 +39,51 @@ def clip(cube, minimum=None, maximum=None):
raise ValueError("Maximum should be equal or larger than minimum.")
cube.data = da.clip(cube.core_data(), minimum, maximum)
return cube


def fix_cubes_endianness(cubes):
"""Transform cubes in big endian to little."""
for cube in cubes:
if cube.dtype.byteorder == ">":
cube.data = _byteswap_array(cube.core_data())
# Swap the coords also if neccesary
for coord in cube.coords():
if coord.dtype.byteorder == ">":
coord.points = _byteswap_array(coord.core_points())
if (coord.bounds is not None) and (coord.bounds.dtype.byteorder == ">"):
coord.bounds = _byteswap_array(coord.core_bounds())
return cubes


def _byteswap_array(arr):
"""
Swaps the bytes of a numpy or dask array
"""
if isinstance(arr, da.Array):
return _byteswap_dask_array(arr)
elif isinstance(arr, np.ndarray):
return _byteswap_numpy_array(arr)
else:
raise NotImplementedError("Data type not supported")


def _byteswap_dask_array(arr):
"""
Swaps the bytes of a dask array

byteswap and newbyteorder are not ufuncs and are not supported
neither by dask or iris. The workaround is to use map_blocks
to call the appropiate numpy functions over the dask array chunks
returned by core_data() See
https://github.com/dask/dask/issues/5689
"""
swapped_da = arr.map_blocks(np.ndarray.byteswap).map_blocks(
np.ndarray.newbyteorder)
return swapped_da


def _byteswap_numpy_array(arr):
"""
Swaps the bytes of a numpy array
"""
return arr.byteswap().newbyteorder()
44 changes: 41 additions & 3 deletions tests/unit/preprocessor/_other/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

import unittest

import iris.coord_categorisation
import iris.coords
import dask.array as da
import numpy as np
import iris.coord_categorisation
import pytest
from iris.coords import DimCoord

from cf_units import Unit
from iris.cube import Cube
from numpy.testing import assert_array_equal

from esmvalcore.preprocessor._other import clip
from esmvalcore.preprocessor._other import clip, fix_cubes_endianness


class TestOther(unittest.TestCase):
Expand Down Expand Up @@ -42,6 +45,41 @@ def test_clip(self):
with self.assertRaises(ValueError):
clip(cube, 10, 8)

@pytest.mark.parametrize("lazy", (True, False))
def test_fix_cubes_endianness(self, lazy=True):

def make_cube(data, big_endian=False):
dtype = ">f8" if big_endian else "<f8"
data = np.array(data, dtype=dtype)
if lazy:
data = da.from_array(data)
# We reuse the same array for the coords to simplify
coords = data.copy()
ocube = Cube(
data,
var_name='sample',
dim_coords_and_dims=(
(
DimCoord(
coords,
var_name='time',
standard_name='time',
units='days since 1950-01-01'
),
0
),
)
)
return ocube

big_endian_cube = make_cube([7., 8.], big_endian=True)
little_endian_cube = make_cube([7., 8.], big_endian=False)
test_cubes = [make_cube(vals) for vals in [(1, 2), (3, 4), (5, 6)]]
test_cubes += [big_endian_cube]
expected_cubes = [c.copy() for c in test_cubes[:-1]] + [little_endian_cube]
actual_cubes = fix_cubes_endianness(test_cubes)
self.assertEqual(actual_cubes, expected_cubes)


if __name__ == '__main__':
unittest.main()