From dbefff44df0075666bfde3d6d08117120c7f5692 Mon Sep 17 00:00:00 2001
From: Alex Merose <al@merose.com>
Date: Mon, 5 Aug 2024 19:51:08 +0100
Subject: [PATCH] All cubed tests pass!

---
 cubed/array_api/creation_functions.py    | 22 ++++++++++++-
 cubed/array_api/statistical_functions.py | 35 ++-------------------
 cubed/array_api/utility_functions.py     | 30 +++++++++++++++++-
 cubed/backend_array_api.py               |  4 +++
 cubed/core/ops.py                        |  5 ++-
 cubed/nan_functions.py                   | 20 ++----------
 cubed/tests/test_core.py                 | 39 +++++++++++++++++-------
 cubed/tests/test_gufunc.py               | 14 ++++++---
 cubed/tests/test_nan_functions.py        |  4 +--
 9 files changed, 103 insertions(+), 70 deletions(-)

diff --git a/cubed/array_api/creation_functions.py b/cubed/array_api/creation_functions.py
index a5317a42..86f41858 100644
--- a/cubed/array_api/creation_functions.py
+++ b/cubed/array_api/creation_functions.py
@@ -1,6 +1,8 @@
 import math
 from typing import TYPE_CHECKING, Iterable, List
 
+import numpy as np
+
 from cubed.backend_array_api import namespace as nxp, to_default_precision
 from cubed.backend_array_api import default_dtypes
 from cubed.core import Plan, gensym
@@ -18,6 +20,23 @@
     from .array_object import Array
 
 
+def _iterable_to_default_dtype(it, device=None):
+    """Determines the default precision dtype of a collection (of collections) of scalars"""
+    w = it
+    while isinstance(w, Iterable):
+        w = next(iter(w))
+
+    defaults = default_dtypes(device=device)
+    if nxp.issubdtype(type(w), np.integer):
+        return defaults["integral"]
+    elif nxp.isreal(w):
+        return defaults["real floating"]
+    elif nxp.iscomplex(w):
+        return defaults["complex floating"]
+    else:
+        raise ValueError(f"there are no default data types supported for {it}.")
+
+
 def arange(
     start, /, stop=None, step=1, *, dtype=None, device=None, chunks="auto", spec=None
 ) -> "Array":
@@ -67,8 +86,9 @@ def asarray(
     ):  # pragma: no cover
         return asarray(a.data)
     elif not isinstance(getattr(a, "shape", None), Iterable):
-        # ensure blocks are arrays
+        dtype = _iterable_to_default_dtype(a, device=device)
         a = nxp.asarray(a, dtype=dtype)
+
     if dtype is None:
         dtype = to_default_precision(a.dtype, device=device)
         a = a.astype(dtype)
diff --git a/cubed/array_api/statistical_functions.py b/cubed/array_api/statistical_functions.py
index 7ee6525e..eb33e2a8 100644
--- a/cubed/array_api/statistical_functions.py
+++ b/cubed/array_api/statistical_functions.py
@@ -5,16 +5,9 @@
     _numeric_dtypes,
     _real_floating_dtypes,
     _real_numeric_dtypes,
-    _signed_integer_dtypes,
-    _unsigned_integer_dtypes,
-    complex64,
-    complex128,
-    float32,
-    float64,
-    int64,
-    uint64,
 )
 from cubed.backend_array_api import namespace as nxp
+from cubed.array_api.utility_functions import operator_default_dtype
 from cubed.core import reduction
 
 
@@ -129,18 +122,7 @@ def prod(
     if x.dtype not in _numeric_dtypes and x.dtype not in _boolean_dtypes:
         raise TypeError("Only numeric or boolean dtypes are allowed in prod")
     if dtype is None:
-        if x.dtype in _boolean_dtypes:
-            dtype = int64
-        elif x.dtype in _signed_integer_dtypes:
-            dtype = int64
-        elif x.dtype in _unsigned_integer_dtypes:
-            dtype = uint64
-        elif x.dtype == float32:
-            dtype = float64
-        elif x.dtype == complex64:
-            dtype = complex128
-        else:
-            dtype = x.dtype
+        dtype = operator_default_dtype(x)
     extra_func_kwargs = dict(dtype=dtype)
     return reduction(
         x,
@@ -161,18 +143,7 @@ def sum(
     if x.dtype not in _numeric_dtypes and x.dtype not in _boolean_dtypes:
         raise TypeError("Only numeric or boolean dtypes are allowed in sum")
     if dtype is None:
-        if x.dtype in _boolean_dtypes:
-            dtype = int64
-        elif x.dtype in _signed_integer_dtypes:
-            dtype = int64
-        elif x.dtype in _unsigned_integer_dtypes:
-            dtype = uint64
-        elif x.dtype == float32:
-            dtype = float64
-        elif x.dtype == complex64:
-            dtype = complex128
-        else:
-            dtype = x.dtype
+        dtype = operator_default_dtype(x)
     extra_func_kwargs = dict(dtype=dtype)
     return reduction(
         x,
diff --git a/cubed/array_api/utility_functions.py b/cubed/array_api/utility_functions.py
index 9825dd9b..16ecb803 100644
--- a/cubed/array_api/utility_functions.py
+++ b/cubed/array_api/utility_functions.py
@@ -1,5 +1,17 @@
 from cubed.array_api.creation_functions import asarray
-from cubed.backend_array_api import namespace as nxp
+from cubed.array_api.dtypes import (
+    _signed_integer_dtypes,
+    _unsigned_integer_dtypes,
+    int32,
+    uint32,
+    int64,
+    uint64,
+    float32,
+    float64,
+    complex64,
+    complex128,
+)
+from cubed.backend_array_api import namespace as nxp, namespace, PRECISION
 from cubed.core import reduction
 
 
@@ -29,3 +41,19 @@ def any(x, /, *, axis=None, keepdims=False, use_new_impl=True, split_every=None)
         use_new_impl=use_new_impl,
         split_every=split_every,
     )
+
+
+def operator_default_dtype(x: namespace.ndarray) -> namespace.dtype:
+    """Derive the correct default data type for operators."""
+    if x.dtype in _signed_integer_dtypes:
+        dtype = int64 if PRECISION == 64 else int32
+    elif x.dtype in _unsigned_integer_dtypes:
+        dtype = uint64 if PRECISION == 64 else uint32
+    elif x.dtype == float32 and PRECISION == 64:
+        dtype = float64
+    elif x.dtype == complex64 and PRECISION == 64:
+        dtype = complex128
+    else:
+        dtype = x.dtype
+
+    return dtype
diff --git a/cubed/backend_array_api.py b/cubed/backend_array_api.py
index b6743ba8..da01d11b 100644
--- a/cubed/backend_array_api.py
+++ b/cubed/backend_array_api.py
@@ -38,6 +38,7 @@
     "complex floating": namespace.complex128,
     "integral": namespace.int64,
 }
+PRECISION=64
 if "CUBED_DEFAULT_PRECISION_X32" in os.environ:
     if os.environ['CUBED_DEFAULT_PRECISION_X32']:
         _DEFAULT_DTYPES = {
@@ -45,6 +46,7 @@
             "complex floating": namespace.complex64,
             "integral": namespace.int32,
         }
+        PRECISION=32
 
 
 def default_dtypes(*, device=None) -> dict:
@@ -71,3 +73,5 @@ def to_default_precision(dtype, *, device=None):
     for k, dtype_ in default_dtypes(device=device).items():
         if namespace.isdtype(dtype, k):
             return dtype_
+
+
diff --git a/cubed/core/ops.py b/cubed/core/ops.py
index 5c8302b1..b5612bc5 100644
--- a/cubed/core/ops.py
+++ b/cubed/core/ops.py
@@ -51,7 +51,10 @@ def from_array(x, chunks="auto", asarray=None, spec=None, device=None) -> "Array
 
     dtype = to_default_precision(x.dtype)
     if x.dtype != dtype:
-        x = x.astype(dtype)
+        if hasattr(x, 'astype'):
+            x = x.astype(dtype)
+        elif hasattr(x, '__array__'):
+            x = x.__array__(dtype)
 
     previous_chunks = getattr(x, "chunks", None)
     outchunks = normalize_chunks(
diff --git a/cubed/nan_functions.py b/cubed/nan_functions.py
index 2acd308b..402726a6 100644
--- a/cubed/nan_functions.py
+++ b/cubed/nan_functions.py
@@ -2,16 +2,9 @@
 
 from cubed.array_api.dtypes import (
     _numeric_dtypes,
-    _signed_integer_dtypes,
-    _unsigned_integer_dtypes,
-    complex64,
-    complex128,
-    float32,
-    float64,
-    int64,
-    uint64,
 )
 from cubed.backend_array_api import namespace as nxp
+from cubed.array_api.utility_functions import operator_default_dtype
 from cubed.core import reduction
 
 # TODO: refactor once nan functions are standardized:
@@ -68,16 +61,7 @@ def nansum(
     if x.dtype not in _numeric_dtypes:
         raise TypeError("Only numeric dtypes are allowed in nansum")
     if dtype is None:
-        if x.dtype in _signed_integer_dtypes:
-            dtype = int64
-        elif x.dtype in _unsigned_integer_dtypes:
-            dtype = uint64
-        elif x.dtype == float32:
-            dtype = float64
-        elif x.dtype == complex64:
-            dtype = complex128
-        else:
-            dtype = x.dtype
+        dtype = operator_default_dtype(x)
     return reduction(
         x,
         nxp.nansum,
diff --git a/cubed/tests/test_core.py b/cubed/tests/test_core.py
index 1e415092..d415eaa9 100644
--- a/cubed/tests/test_core.py
+++ b/cubed/tests/test_core.py
@@ -1,3 +1,4 @@
+import os
 import platform
 import random
 from functools import partial
@@ -59,9 +60,13 @@ def modal_executor(request):
 
 def test_as_array_fails(spec):
     a = np.ones((1000, 1000))
+    expected_size = "8"
+    if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False):
+        expected_size = "4"
+
     with pytest.raises(
         ValueError,
-        match="Size of in memory array is 8.0 MB which exceeds maximum of 1.0 MB.",
+        match=f"Size of in memory array is {expected_size}.0 MB which exceeds maximum of 1.0 MB.",
     ):
         xp.asarray(a, chunks=(100, 100), spec=spec)
 
@@ -183,55 +188,67 @@ def test_map_blocks_with_kwargs(spec, executor):
 
 
 def test_map_blocks_with_block_id(spec, executor):
+    dtype = "int64"
+    if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False):
+        dtype = "int32"
+
     # based on dask test
     def func(block, block_id=None, c=0):
         return nxp.ones_like(block) * int(sum(block_id)) + c
 
-    a = xp.arange(10, dtype="int64", chunks=(2,))
-    b = cubed.map_blocks(func, a, dtype="int64")
+    a = xp.arange(10, dtype=dtype, chunks=(2,))
+    b = cubed.map_blocks(func, a, dtype=dtype)
 
     assert_array_equal(
         b.compute(executor=executor),
-        np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype="int64"),
+        np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=dtype),
     )
 
     a = xp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], chunks=(2, 2), spec=spec)
-    b = cubed.map_blocks(func, a, dtype="int64")
+    b = cubed.map_blocks(func, a, dtype=dtype)
 
     assert_array_equal(
         b.compute(executor=executor),
-        np.array([[0, 0, 1], [0, 0, 1], [1, 1, 2]], dtype="int64"),
+        np.array([[0, 0, 1], [0, 0, 1], [1, 1, 2]], dtype=dtype),
     )
 
-    c = cubed.map_blocks(func, a, dtype="int64", c=1)
+    c = cubed.map_blocks(func, a, dtype=dtype, c=1)
 
     assert_array_equal(
         c.compute(executor=executor),
-        np.array([[0, 0, 1], [0, 0, 1], [1, 1, 2]], dtype="int64") + 1,
+        np.array([[0, 0, 1], [0, 0, 1], [1, 1, 2]], dtype=dtype) + 1,
     )
 
 
 def test_map_blocks_no_array_args(spec, executor):
+    dtype = "int64"
+    if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False):
+        dtype = "int32"
+
     def func(block, block_id=None):
         return nxp.ones_like(block) * int(sum(block_id))
 
-    a = cubed.map_blocks(func, dtype="int64", chunks=((5, 3),), spec=spec)
+    a = cubed.map_blocks(func, dtype=dtype, chunks=((5, 3),), spec=spec)
     assert a.chunks == ((5, 3),)
 
     assert_array_equal(
         a.compute(executor=executor),
-        np.array([0, 0, 0, 0, 0, 1, 1, 1], dtype="int64"),
+        np.array([0, 0, 0, 0, 0, 1, 1, 1], dtype=dtype),
     )
 
 
 def test_map_blocks_with_different_block_shapes(spec):
+    dtype = "int64"
+    if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False):
+        dtype = "int32"
+
     def func(x, y):
         return x
 
     a = xp.asarray([[[12, 13]]], spec=spec)
     b = xp.asarray([14, 15], spec=spec)
     c = cubed.map_blocks(
-        func, a, b, dtype="int64", chunks=(1, 1, 2), drop_axis=2, new_axis=2
+        func, a, b, dtype=dtype, chunks=(1, 1, 2), drop_axis=2, new_axis=2
     )
     assert_array_equal(c.compute(), np.array([[[12, 13]]]))
 
diff --git a/cubed/tests/test_gufunc.py b/cubed/tests/test_gufunc.py
index deb7d583..e99aacbc 100644
--- a/cubed/tests/test_gufunc.py
+++ b/cubed/tests/test_gufunc.py
@@ -1,3 +1,5 @@
+import os
+
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose, assert_equal
@@ -71,13 +73,17 @@ def foo(x):
 
 
 def test_gufunc_two_inputs(spec):
+    dtype = int
+    if os.environ.get('CUBED_DEFAULT_PRECISION_X32', False):
+        dtype = nxp.int32
+
     def foo(x, y):
         return np.einsum("...ij,...jk->ik", x, y)
 
-    a = xp.ones((2, 3), chunks=100, dtype=int, spec=spec)
-    b = xp.ones((3, 4), chunks=100, dtype=int, spec=spec)
-    x = apply_gufunc(foo, "(i,j),(j,k)->(i,k)", a, b, output_dtypes=int)
-    assert_equal(x, 3 * np.ones((2, 4), dtype=int))
+    a = xp.ones((2, 3), chunks=100, dtype=dtype, spec=spec)
+    b = xp.ones((3, 4), chunks=100, dtype=dtype, spec=spec)
+    x = apply_gufunc(foo, "(i,j),(j,k)->(i,k)", a, b, output_dtypes=dtype)
+    assert_equal(x, 3 * np.ones((2, 4), dtype=dtype))
 
 
 def test_apply_gufunc_axes_two_kept_coredims(spec):
diff --git a/cubed/tests/test_nan_functions.py b/cubed/tests/test_nan_functions.py
index 53264e79..754ac2d4 100644
--- a/cubed/tests/test_nan_functions.py
+++ b/cubed/tests/test_nan_functions.py
@@ -27,10 +27,10 @@ def test_nanmean_allnan(spec):
 
 
 def test_nansum(spec):
-    a = xp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, xp.nan]], chunks=(2, 2), spec=spec)
+    a = xp.asarray([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, xp.nan]], chunks=(2, 2), spec=spec)
     b = cubed.nansum(a)
     assert_array_equal(
-        b.compute(), np.nansum(np.array([[1, 2, 3], [4, 5, 6], [7, 8, np.nan]]))
+        b.compute(), np.nansum(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, np.nan]]))
     )