Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert floats representing integers to int in sample set serialization #1354

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dimod/sampleset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,7 +1767,7 @@ def to_serializable(self, use_bytes=False, bytes_type=bytes,
:meth:`~.SampleSet.from_serializable`

"""
schema_version = "3.1.0"
schema_version = "3.2.0"

# developer note: numpy's record array stores the samples, energies,
# num_occ. etc as a struct array. If we dumped that array directly to
Expand Down
40 changes: 40 additions & 0 deletions dimod/serialization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,47 @@
# limitations under the License.

import collections.abc as abc

from numbers import Integral, Number
from typing import List, Union

import numpy as np


__all__ = ["serialize_ndarray", "deserialize_ndarray",
"serialize_ndarrays", "deserialize_ndarrays",
"pack_samples", "unpack_samples",
]


def _replace_float_with_int(arr: Union[List[float], List[List]]):
"""Replace floats representing integers with ints in a list representing an array.

Take a list of floats, as produced by :meth:`numpy.ndarray.tolist` from an array
of floating types, and convert any ``float`` representing an integer value into
``int``.

This function assumes some uniformity of the list structure. For instance giving it
a list like ``[0.0, 0]`` or ``[0.0, [0.0]`` will cause it to fail.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo: missing ]


Acts on the list(s) in-place.
"""
if not len(arr):
# nothing to do when the list is empty
pass

elif isinstance(arr[0], List):
for subarr in arr:
_replace_float_with_int(subarr)

elif hasattr(arr[0], "is_integer"):
arr[:] = (int(a) if a.is_integer() else a for a in arr)

else:
raise ValueError("expected a (possibly nested) list of floats, "
f"recieved a (possible nested) list of {type(arr[0])}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typos: received, possibly



def serialize_ndarray(arr, use_bytes=False, bytes_type=bytes):
"""Serialize a NumPy array.

Expand All @@ -43,6 +79,10 @@ def serialize_ndarray(arr, use_bytes=False, bytes_type=bytes):
data = bytes_type(arr.tobytes(order='C'))
else:
data = arr.tolist()

if np.issubdtype(arr.dtype, np.floating):
_replace_float_with_int(data)

return dict(type='array',
data=data,
data_type=arr.dtype.name,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
features:
- |
Implement ``SampleSet`` serialization schema version 3.2.0.

Version 3.2.0 replaces ``float`` values that represent integers with ``int``
in the ``"data"`` field of any arrays returned by ``SampleSet.to_serializable()``.
In some pathological cases this can result in a much smaller representation
when the data dictionaries are json-serialized by avoiding the redundant
``.0`` appended to every value.

This is a backwards-compatible change.
100 changes: 100 additions & 0 deletions tests/test_serialization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numbers
import unittest

import numpy as np
Expand Down Expand Up @@ -47,3 +48,102 @@ def test_functional_3x3triu(self):
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype)

def test_replacing_floats_with_ints(self):

floating_dtypes = [np.float16, np.float32, np.float64]

if int(np.__version__.split(".")[1]) >= 22:
# Numpy<1.22.0 didn't support `is_integer()` on floating types
# so float128 etc don't work out-of-the-box because `tolist()`
# doesn't convert those to Python float.
floating_dtypes.append(np.longdouble)

for dtype in floating_dtypes:
with self.subTest(f"{dtype}, all integer"):
arr = np.ones(3, dtype=dtype)
arr[0] = 2
arr[1] = -0.0

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored

# test the ones that can be are mapped to int
self.assertIsInstance(obj["data"][0], int)
self.assertIsInstance(obj["data"][1], int)
self.assertIsInstance(obj["data"][2], int)

with self.subTest(f"{dtype}, all float"):
arr = np.empty(3, dtype=dtype)
arr[0] = 1.5
arr[1] = float("inf")
arr[2] = float("nan")

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored

# test the ones that can be are mapped to int
self.assertIsInstance(obj["data"][0], numbers.Real)
self.assertIsInstance(obj["data"][1], numbers.Real)
self.assertIsInstance(obj["data"][2], numbers.Real)

with self.subTest(f"{dtype}, mixed"):
arr = np.ones(3, dtype=dtype)
arr[0] = 1.5
arr[1] = -0.0

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored

# test the ones that can be are mapped to int
self.assertIsInstance(obj["data"][0], numbers.Real)
self.assertIsInstance(obj["data"][1], int)
self.assertIsInstance(obj["data"][2], int)

with self.subTest("complex, mixed"):
arr = np.ones(3, dtype=complex)
arr[0] = 1.5
arr[1] = -0.0

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype)

# in this case everything is kept as a complex number
self.assertIsInstance(obj["data"][0], complex)
self.assertIsInstance(obj["data"][1], complex)
self.assertIsInstance(obj["data"][2], complex)

for dtype in [np.int8, np.int16, np.int32, np.int64]:
with self.subTest(dtype):
arr = np.empty(3, dtype=dtype)
arr[0] = 2
arr[1] = 0
arr[2] = -1

obj = serialize_ndarray(arr)

# test the round trip
new = deserialize_ndarray(obj)
np.testing.assert_array_equal(arr, new)
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored

# test the ones that can be are mapped to int
self.assertIsInstance(obj["data"][0], int)
self.assertIsInstance(obj["data"][1], int)
self.assertIsInstance(obj["data"][2], int)