From dbefff44df0075666bfde3d6d08117120c7f5692 Mon Sep 17 00:00:00 2001 From: Alex Merose Date: Mon, 5 Aug 2024 19:51:08 +0100 Subject: [PATCH] All cubed tests pass! --- cubed/array_api/creation_functions.py | 22 ++++++++++++- cubed/array_api/statistical_functions.py | 35 ++------------------- cubed/array_api/utility_functions.py | 30 +++++++++++++++++- cubed/backend_array_api.py | 4 +++ cubed/core/ops.py | 5 ++- cubed/nan_functions.py | 20 ++---------- cubed/tests/test_core.py | 39 +++++++++++++++++------- cubed/tests/test_gufunc.py | 14 ++++++--- cubed/tests/test_nan_functions.py | 4 +-- 9 files changed, 103 insertions(+), 70 deletions(-) diff --git a/cubed/array_api/creation_functions.py b/cubed/array_api/creation_functions.py index a5317a42..86f41858 100644 --- a/cubed/array_api/creation_functions.py +++ b/cubed/array_api/creation_functions.py @@ -1,6 +1,8 @@ import math from typing import TYPE_CHECKING, Iterable, List +import numpy as np + from cubed.backend_array_api import namespace as nxp, to_default_precision from cubed.backend_array_api import default_dtypes from cubed.core import Plan, gensym @@ -18,6 +20,23 @@ from .array_object import Array +def _iterable_to_default_dtype(it, device=None): + """Determines the default precision dtype of a collection (of collections) of scalars""" + w = it + while isinstance(w, Iterable): + w = next(iter(w)) + + defaults = default_dtypes(device=device) + if nxp.issubdtype(type(w), np.integer): + return defaults["integral"] + elif nxp.isreal(w): + return defaults["real floating"] + elif nxp.iscomplex(w): + return defaults["complex floating"] + else: + raise ValueError(f"there are no default data types supported for {it}.") + + def arange( start, /, stop=None, step=1, *, dtype=None, device=None, chunks="auto", spec=None ) -> "Array": @@ -67,8 +86,9 @@ def asarray( ): # pragma: no cover return asarray(a.data) elif not isinstance(getattr(a, "shape", None), Iterable): - # ensure blocks are arrays + dtype = _iterable_to_default_dtype(a, device=device) a = nxp.asarray(a, dtype=dtype) + if dtype is None: dtype = to_default_precision(a.dtype, device=device) a = a.astype(dtype) diff --git a/cubed/array_api/statistical_functions.py b/cubed/array_api/statistical_functions.py index 7ee6525e..eb33e2a8 100644 --- a/cubed/array_api/statistical_functions.py +++ b/cubed/array_api/statistical_functions.py @@ -5,16 +5,9 @@ _numeric_dtypes, _real_floating_dtypes, _real_numeric_dtypes, - _signed_integer_dtypes, - _unsigned_integer_dtypes, - complex64, - complex128, - float32, - float64, - int64, - uint64, ) from cubed.backend_array_api import namespace as nxp +from cubed.array_api.utility_functions import operator_default_dtype from cubed.core import reduction @@ -129,18 +122,7 @@ def prod( if x.dtype not in _numeric_dtypes and x.dtype not in _boolean_dtypes: raise TypeError("Only numeric or boolean dtypes are allowed in prod") if dtype is None: - if x.dtype in _boolean_dtypes: - dtype = int64 - elif x.dtype in _signed_integer_dtypes: - dtype = int64 - elif x.dtype in _unsigned_integer_dtypes: - dtype = uint64 - elif x.dtype == float32: - dtype = float64 - elif x.dtype == complex64: - dtype = complex128 - else: - dtype = x.dtype + dtype = operator_default_dtype(x) extra_func_kwargs = dict(dtype=dtype) return reduction( x, @@ -161,18 +143,7 @@ def sum( if x.dtype not in _numeric_dtypes and x.dtype not in _boolean_dtypes: raise TypeError("Only numeric or boolean dtypes are allowed in sum") if dtype is None: - if x.dtype in _boolean_dtypes: - dtype = int64 - elif x.dtype in _signed_integer_dtypes: - dtype = int64 - elif x.dtype in _unsigned_integer_dtypes: - dtype = uint64 - elif x.dtype == float32: - dtype = float64 - elif x.dtype == complex64: - dtype = complex128 - else: - dtype = x.dtype + dtype = operator_default_dtype(x) extra_func_kwargs = dict(dtype=dtype) return reduction( x, diff --git a/cubed/array_api/utility_functions.py b/cubed/array_api/utility_functions.py index 9825dd9b..16ecb803 100644 --- a/cubed/array_api/utility_functions.py +++ b/cubed/array_api/utility_functions.py @@ -1,5 +1,17 @@ from cubed.array_api.creation_functions import asarray -from cubed.backend_array_api import namespace as nxp +from cubed.array_api.dtypes import ( + _signed_integer_dtypes, + _unsigned_integer_dtypes, + int32, + uint32, + int64, + uint64, + float32, + float64, + complex64, + complex128, +) +from cubed.backend_array_api import namespace as nxp, namespace, PRECISION from cubed.core import reduction @@ -29,3 +41,19 @@ def any(x, /, *, axis=None, keepdims=False, use_new_impl=True, split_every=None) use_new_impl=use_new_impl, split_every=split_every, ) + + +def operator_default_dtype(x: namespace.ndarray) -> namespace.dtype: + """Derive the correct default data type for operators.""" + if x.dtype in _signed_integer_dtypes: + dtype = int64 if PRECISION == 64 else int32 + elif x.dtype in _unsigned_integer_dtypes: + dtype = uint64 if PRECISION == 64 else uint32 + elif x.dtype == float32 and PRECISION == 64: + dtype = float64 + elif x.dtype == complex64 and PRECISION == 64: + dtype = complex128 + else: + dtype = x.dtype + + return dtype diff --git a/cubed/backend_array_api.py b/cubed/backend_array_api.py index b6743ba8..da01d11b 100644 --- a/cubed/backend_array_api.py +++ b/cubed/backend_array_api.py @@ -38,6 +38,7 @@ "complex floating": namespace.complex128, "integral": namespace.int64, } +PRECISION=64 if "CUBED_DEFAULT_PRECISION_X32" in os.environ: if os.environ['CUBED_DEFAULT_PRECISION_X32']: _DEFAULT_DTYPES = { @@ -45,6 +46,7 @@ "complex floating": namespace.complex64, "integral": namespace.int32, } + PRECISION=32 def default_dtypes(*, device=None) -> dict: @@ -71,3 +73,5 @@ def to_default_precision(dtype, *, device=None): for k, dtype_ in default_dtypes(device=device).items(): if namespace.isdtype(dtype, k): return dtype_ + + diff --git a/cubed/core/ops.py b/cubed/core/ops.py index 5c8302b1..b5612bc5 100644 --- a/cubed/core/ops.py +++ b/cubed/core/ops.py @@ -51,7 +51,10 @@ def from_array(x, chunks="auto", asarray=None, spec=None, device=None) -> "Array dtype = to_default_precision(x.dtype) if x.dtype != dtype: - x = x.astype(dtype) + if hasattr(x, 'astype'): + x = x.astype(dtype) + elif hasattr(x, '__array__'): + x = x.__array__(dtype) previous_chunks = getattr(x, "chunks", None) outchunks = normalize_chunks( diff --git a/cubed/nan_functions.py b/cubed/nan_functions.py index 2acd308b..402726a6 100644 --- a/cubed/nan_functions.py +++ b/cubed/nan_functions.py @@ -2,16 +2,9 @@ from cubed.array_api.dtypes import ( _numeric_dtypes, - _signed_integer_dtypes, - _unsigned_integer_dtypes, - complex64, - complex128, - float32, - float64, - int64, - uint64, ) from cubed.backend_array_api import namespace as nxp +from cubed.array_api.utility_functions import operator_default_dtype from cubed.core import reduction # TODO: refactor once nan functions are standardized: @@ -68,16 +61,7 @@ def nansum( if x.dtype not in _numeric_dtypes: raise TypeError("Only numeric dtypes are allowed in nansum") if dtype is None: - if x.dtype in _signed_integer_dtypes: - dtype = int64 - elif x.dtype in _unsigned_integer_dtypes: - dtype = uint64 - elif x.dtype == float32: - dtype = float64 - elif x.dtype == complex64: - dtype = complex128 - else: - dtype = x.dtype + dtype = operator_default_dtype(x) return reduction( x, nxp.nansum, diff --git a/cubed/tests/test_core.py b/cubed/tests/test_core.py index 1e415092..d415eaa9 100644 --- a/cubed/tests/test_core.py +++ b/cubed/tests/test_core.py @@ -1,3 +1,4 @@ +import os import platform import random from functools import partial @@ -59,9 +60,13 @@ def modal_executor(request): def test_as_array_fails(spec): a = np.ones((1000, 1000)) + expected_size = "8" + if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False): + expected_size = "4" + with pytest.raises( ValueError, - match="Size of in memory array is 8.0 MB which exceeds maximum of 1.0 MB.", + match=f"Size of in memory array is {expected_size}.0 MB which exceeds maximum of 1.0 MB.", ): xp.asarray(a, chunks=(100, 100), spec=spec) @@ -183,55 +188,67 @@ def test_map_blocks_with_kwargs(spec, executor): def test_map_blocks_with_block_id(spec, executor): + dtype = "int64" + if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False): + dtype = "int32" + # based on dask test def func(block, block_id=None, c=0): return nxp.ones_like(block) * int(sum(block_id)) + c - a = xp.arange(10, dtype="int64", chunks=(2,)) - b = cubed.map_blocks(func, a, dtype="int64") + a = xp.arange(10, dtype=dtype, chunks=(2,)) + b = cubed.map_blocks(func, a, dtype=dtype) assert_array_equal( b.compute(executor=executor), - np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype="int64"), + np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=dtype), ) a = xp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], chunks=(2, 2), spec=spec) - b = cubed.map_blocks(func, a, dtype="int64") + b = cubed.map_blocks(func, a, dtype=dtype) assert_array_equal( b.compute(executor=executor), - np.array([[0, 0, 1], [0, 0, 1], [1, 1, 2]], dtype="int64"), + np.array([[0, 0, 1], [0, 0, 1], [1, 1, 2]], dtype=dtype), ) - c = cubed.map_blocks(func, a, dtype="int64", c=1) + c = cubed.map_blocks(func, a, dtype=dtype, c=1) assert_array_equal( c.compute(executor=executor), - np.array([[0, 0, 1], [0, 0, 1], [1, 1, 2]], dtype="int64") + 1, + np.array([[0, 0, 1], [0, 0, 1], [1, 1, 2]], dtype=dtype) + 1, ) def test_map_blocks_no_array_args(spec, executor): + dtype = "int64" + if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False): + dtype = "int32" + def func(block, block_id=None): return nxp.ones_like(block) * int(sum(block_id)) - a = cubed.map_blocks(func, dtype="int64", chunks=((5, 3),), spec=spec) + a = cubed.map_blocks(func, dtype=dtype, chunks=((5, 3),), spec=spec) assert a.chunks == ((5, 3),) assert_array_equal( a.compute(executor=executor), - np.array([0, 0, 0, 0, 0, 1, 1, 1], dtype="int64"), + np.array([0, 0, 0, 0, 0, 1, 1, 1], dtype=dtype), ) def test_map_blocks_with_different_block_shapes(spec): + dtype = "int64" + if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False): + dtype = "int32" + def func(x, y): return x a = xp.asarray([[[12, 13]]], spec=spec) b = xp.asarray([14, 15], spec=spec) c = cubed.map_blocks( - func, a, b, dtype="int64", chunks=(1, 1, 2), drop_axis=2, new_axis=2 + func, a, b, dtype=dtype, chunks=(1, 1, 2), drop_axis=2, new_axis=2 ) assert_array_equal(c.compute(), np.array([[[12, 13]]])) diff --git a/cubed/tests/test_gufunc.py b/cubed/tests/test_gufunc.py index deb7d583..e99aacbc 100644 --- a/cubed/tests/test_gufunc.py +++ b/cubed/tests/test_gufunc.py @@ -1,3 +1,5 @@ +import os + import numpy as np import pytest from numpy.testing import assert_allclose, assert_equal @@ -71,13 +73,17 @@ def foo(x): def test_gufunc_two_inputs(spec): + dtype = int + if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False): + dtype = nxp.int32 + def foo(x, y): return np.einsum("...ij,...jk->ik", x, y) - a = xp.ones((2, 3), chunks=100, dtype=int, spec=spec) - b = xp.ones((3, 4), chunks=100, dtype=int, spec=spec) - x = apply_gufunc(foo, "(i,j),(j,k)->(i,k)", a, b, output_dtypes=int) - assert_equal(x, 3 * np.ones((2, 4), dtype=int)) + a = xp.ones((2, 3), chunks=100, dtype=dtype, spec=spec) + b = xp.ones((3, 4), chunks=100, dtype=dtype, spec=spec) + x = apply_gufunc(foo, "(i,j),(j,k)->(i,k)", a, b, output_dtypes=dtype) + assert_equal(x, 3 * np.ones((2, 4), dtype=dtype)) def test_apply_gufunc_axes_two_kept_coredims(spec): diff --git a/cubed/tests/test_nan_functions.py b/cubed/tests/test_nan_functions.py index 53264e79..754ac2d4 100644 --- a/cubed/tests/test_nan_functions.py +++ b/cubed/tests/test_nan_functions.py @@ -27,10 +27,10 @@ def test_nanmean_allnan(spec): def test_nansum(spec): - a = xp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, xp.nan]], chunks=(2, 2), spec=spec) + a = xp.asarray([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, xp.nan]], chunks=(2, 2), spec=spec) b = cubed.nansum(a) assert_array_equal( - b.compute(), np.nansum(np.array([[1, 2, 3], [4, 5, 6], [7, 8, np.nan]])) + b.compute(), np.nansum(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, np.nan]])) )