diff --git a/dimod/sampleset.py b/dimod/sampleset.py index 5e001fac8..f1b296eb3 100644 --- a/dimod/sampleset.py +++ b/dimod/sampleset.py @@ -1767,7 +1767,7 @@ def to_serializable(self, use_bytes=False, bytes_type=bytes, :meth:`~.SampleSet.from_serializable` """ - schema_version = "3.1.0" + schema_version = "3.2.0" # developer note: numpy's record array stores the samples, energies, # num_occ. etc as a struct array. If we dumped that array directly to diff --git a/dimod/serialization/utils.py b/dimod/serialization/utils.py index 26c31d0ad..f7328d884 100644 --- a/dimod/serialization/utils.py +++ b/dimod/serialization/utils.py @@ -13,11 +13,47 @@ # limitations under the License. import collections.abc as abc -from numbers import Integral, Number + +from numbers import Integral, Number, Real +from typing import List, Union import numpy as np +__all__ = ["serialize_ndarray", "deserialize_ndarray", + "serialize_ndarrays", "deserialize_ndarrays", + "pack_samples", "unpack_samples", + ] + + +def _replace_float_with_int(arr: Union[List[float], List[List]]): + """Replace floats representing integers with ints in a list representing an array. + + Take a list of floats, as produced by :meth:`numpy.ndarray.tolist` from an array + of floating types, and convert any ``float`` representing an integer value into + ``int``. + + This function assumes some uniformity of the list structure. For instance giving it + a list like ``[0.0, 0]`` or ``[0.0, [0.0]`` will cause it to fail. + + Acts on the list(s) in-place. + """ + if not len(arr): + # nothing to do when the list is empty + pass + + elif isinstance(arr[0], List): + for subarr in arr: + _replace_float_with_int(subarr) + + elif hasattr(arr[0], "is_integer"): + arr[:] = (int(a) if a.is_integer() else a for a in arr) + + else: + raise ValueError("expected a (possibly nested) list of floats, " + f"recieved a (possible nested) list of {type(arr[0])}") + + def serialize_ndarray(arr, use_bytes=False, bytes_type=bytes): """Serialize a NumPy array. @@ -43,6 +79,10 @@ def serialize_ndarray(arr, use_bytes=False, bytes_type=bytes): data = bytes_type(arr.tobytes(order='C')) else: data = arr.tolist() + + if np.issubdtype(arr.dtype, np.floating): + _replace_float_with_int(data) + return dict(type='array', data=data, data_type=arr.dtype.name, diff --git a/releasenotes/notes/sampleset-serialization-replace-float-with-int-5a3133c66480b8f4.yaml b/releasenotes/notes/sampleset-serialization-replace-float-with-int-5a3133c66480b8f4.yaml new file mode 100644 index 000000000..c7255d391 --- /dev/null +++ b/releasenotes/notes/sampleset-serialization-replace-float-with-int-5a3133c66480b8f4.yaml @@ -0,0 +1,12 @@ +--- +features: + - | + Implement ``SampleSet`` serialization schema version 3.2.0. + + Version 3.2.0 replaces ``float`` values that represent integers with ``int`` + in the ``"data"`` field of any arrays returned by ``SampleSet.to_serializable()``. + In some pathological cases this can result in a much smaller representation + when the data dictionaries are json-serialized by avoiding the redundant + ``.0`` appended to every value. + + This is a backwards-compatible change. diff --git a/tests/test_serialization_utils.py b/tests/test_serialization_utils.py index 191f63327..913e8196e 100644 --- a/tests/test_serialization_utils.py +++ b/tests/test_serialization_utils.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numbers import unittest import numpy as np @@ -47,3 +48,93 @@ def test_functional_3x3triu(self): new = deserialize_ndarray(obj) np.testing.assert_array_equal(arr, new) self.assertEqual(arr.dtype, new.dtype) + + def test_replacing_floats_with_ints(self): + for dtype in [np.float16, np.float32, np.float64, np.longdouble]: + with self.subTest(f"{dtype}, all integer"): + arr = np.ones(3, dtype=dtype) + arr[0] = 2 + arr[1] = -0.0 + + obj = serialize_ndarray(arr) + + # test the round trip + new = deserialize_ndarray(obj) + np.testing.assert_array_equal(arr, new) + self.assertEqual(arr.dtype, new.dtype) # original vartype is restored + + # test the ones that can be are mapped to int + self.assertIsInstance(obj["data"][0], int) + self.assertIsInstance(obj["data"][1], int) + self.assertIsInstance(obj["data"][2], int) + + with self.subTest(f"{dtype}, all float"): + arr = np.empty(3, dtype=dtype) + arr[0] = 1.5 + arr[1] = float("inf") + arr[2] = float("nan") + + obj = serialize_ndarray(arr) + + # test the round trip + new = deserialize_ndarray(obj) + np.testing.assert_array_equal(arr, new) + self.assertEqual(arr.dtype, new.dtype) # original vartype is restored + + # test the ones that can be are mapped to int + self.assertIsInstance(obj["data"][0], numbers.Real) + self.assertIsInstance(obj["data"][1], numbers.Real) + self.assertIsInstance(obj["data"][2], numbers.Real) + + with self.subTest(f"{dtype}, mixed"): + arr = np.ones(3, dtype=dtype) + arr[0] = 1.5 + arr[1] = -0.0 + + obj = serialize_ndarray(arr) + + # test the round trip + new = deserialize_ndarray(obj) + np.testing.assert_array_equal(arr, new) + self.assertEqual(arr.dtype, new.dtype) # original vartype is restored + + # test the ones that can be are mapped to int + self.assertIsInstance(obj["data"][0], numbers.Real) + self.assertIsInstance(obj["data"][1], int) + self.assertIsInstance(obj["data"][2], int) + + with self.subTest("complex, mixed"): + arr = np.ones(3, dtype=complex) + arr[0] = 1.5 + arr[1] = -0.0 + + obj = serialize_ndarray(arr) + + # test the round trip + new = deserialize_ndarray(obj) + np.testing.assert_array_equal(arr, new) + self.assertEqual(arr.dtype, new.dtype) + + # in this case everything is kept as a complex number + self.assertIsInstance(obj["data"][0], complex) + self.assertIsInstance(obj["data"][1], complex) + self.assertIsInstance(obj["data"][2], complex) + + for dtype in [np.int8, np.int16, np.int32, np.int64]: + with self.subTest(dtype): + arr = np.empty(3, dtype=dtype) + arr[0] = 2 + arr[1] = 0 + arr[2] = -1 + + obj = serialize_ndarray(arr) + + # test the round trip + new = deserialize_ndarray(obj) + np.testing.assert_array_equal(arr, new) + self.assertEqual(arr.dtype, new.dtype) # original vartype is restored + + # test the ones that can be are mapped to int + self.assertIsInstance(obj["data"][0], int) + self.assertIsInstance(obj["data"][1], int) + self.assertIsInstance(obj["data"][2], int)