Skip to content

Commit

Permalink
Merge pull request #1354 from arcondello/feature/sampleset-integer-se…
Browse files Browse the repository at this point in the history
…rialization

Convert floats representing integers to int in sample set serialization
  • Loading branch information
arcondello authored Aug 17, 2023
2 parents 06f6ab6 + 3055f08 commit 2d35745
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 1 deletion.
2 changes: 1 addition & 1 deletion dimod/sampleset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1782,7 +1782,7 @@ def to_serializable(self, use_bytes=False, bytes_type=bytes,
:meth:`~.SampleSet.from_serializable`
"""
schema_version = "3.1.0"
schema_version = "3.2.0"

# developer note: numpy's record array stores the samples, energies,
# num_occ. etc as a struct array. If we dumped that array directly to
Expand Down
40 changes: 40 additions & 0 deletions dimod/serialization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,47 @@
# limitations under the License.

import collections.abc as abc

from numbers import Integral, Number
from typing import List, Union

import numpy as np


__all__ = ["serialize_ndarray", "deserialize_ndarray",
"serialize_ndarrays", "deserialize_ndarrays",
"pack_samples", "unpack_samples",
]


def _replace_float_with_int(arr: Union[List[float], List[List]]):
"""Replace floats representing integers with ints in a list representing an array.
Take a list of floats, as produced by :meth:`numpy.ndarray.tolist` from an array
of floating types, and convert any ``float`` representing an integer value into
``int``.
This function assumes some uniformity of the list structure. For instance giving it
a list like ``[0.0, 0]`` or ``[0.0, [0.0]]`` will cause it to fail.
Acts on the list(s) in-place.
"""
if not len(arr):
# nothing to do when the list is empty
pass

elif isinstance(arr[0], List):
for subarr in arr:
_replace_float_with_int(subarr)

elif hasattr(arr[0], "is_integer"):
arr[:] = (int(a) if a.is_integer() else a for a in arr)

else:
raise ValueError("expected a (possibly nested) list of floats, "
f"received a (possibly nested) list of {type(arr[0])}")


def serialize_ndarray(arr, use_bytes=False, bytes_type=bytes):
"""Serialize a NumPy array.
Expand All @@ -43,6 +79,10 @@ def serialize_ndarray(arr, use_bytes=False, bytes_type=bytes):
data = bytes_type(arr.tobytes(order='C'))
else:
data = arr.tolist()

if np.issubdtype(arr.dtype, np.floating):
_replace_float_with_int(data)

return dict(type='array',
data=data,
data_type=arr.dtype.name,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
features:
- |
Implement ``SampleSet`` serialization schema version 3.2.0.
Version 3.2.0 replaces ``float`` values that represent integers with ``int``
in the ``"data"`` field of any arrays returned by ``SampleSet.to_serializable()``.
In some pathological cases this can result in a much smaller representation
when the data dictionaries are json-serialized by avoiding the redundant
``.0`` appended to every value.
This is a backwards-compatible change.
100 changes: 100 additions & 0 deletions tests/test_serialization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numbers
import unittest

import numpy as np
Expand Down Expand Up @@ -47,3 +48,102 @@ def test_functional_3x3triu(self):
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype)

def test_replacing_floats_with_ints(self):

floating_dtypes = [np.float16, np.float32, np.float64]

if int(np.__version__.split(".")[1]) >= 22:
# Numpy<1.22.0 didn't support `is_integer()` on floating types
# so float128 etc don't work out-of-the-box because `tolist()`
# doesn't convert those to Python float.
floating_dtypes.append(np.longdouble)

for dtype in floating_dtypes:
with self.subTest(f"{dtype}, all integer"):
arr = np.ones(3, dtype=dtype)
arr[0] = 2
arr[1] = -0.0

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored

# test the ones that can be are mapped to int
self.assertIsInstance(obj["data"][0], int)
self.assertIsInstance(obj["data"][1], int)
self.assertIsInstance(obj["data"][2], int)

with self.subTest(f"{dtype}, all float"):
arr = np.empty(3, dtype=dtype)
arr[0] = 1.5
arr[1] = float("inf")
arr[2] = float("nan")

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored

# test the ones that can be are mapped to int
self.assertIsInstance(obj["data"][0], numbers.Real)
self.assertIsInstance(obj["data"][1], numbers.Real)
self.assertIsInstance(obj["data"][2], numbers.Real)

with self.subTest(f"{dtype}, mixed"):
arr = np.ones(3, dtype=dtype)
arr[0] = 1.5
arr[1] = -0.0

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored

# test the ones that can be are mapped to int
self.assertIsInstance(obj["data"][0], numbers.Real)
self.assertIsInstance(obj["data"][1], int)
self.assertIsInstance(obj["data"][2], int)

with self.subTest("complex, mixed"):
arr = np.ones(3, dtype=complex)
arr[0] = 1.5
arr[1] = -0.0

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype)

# in this case everything is kept as a complex number
self.assertIsInstance(obj["data"][0], complex)
self.assertIsInstance(obj["data"][1], complex)
self.assertIsInstance(obj["data"][2], complex)

for dtype in [np.int8, np.int16, np.int32, np.int64]:
with self.subTest(dtype):
arr = np.empty(3, dtype=dtype)
arr[0] = 2
arr[1] = 0
arr[2] = -1

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored

# test the ones that can be are mapped to int
self.assertIsInstance(obj["data"][0], int)
self.assertIsInstance(obj["data"][1], int)
self.assertIsInstance(obj["data"][2], int)

0 comments on commit 2d35745

Please sign in to comment.