From 32b8357751197a5ce2d9de8855242d1542c75c75 Mon Sep 17 00:00:00 2001 From: Talley Lambert Date: Fri, 29 Sep 2023 12:05:02 -0400 Subject: [PATCH] refactor: make binarylayer a dataclass (#178) * refactor: make binarylayer a dataclass * fix slots * update docs * docs --- src/nd2/_binary.py | 35 ++++++++++++++++++++++++----------- src/nd2/nd2file.py | 23 +++++++++++++++-------- tests/test_binary.py | 15 ++++++++++++++- 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/src/nd2/_binary.py b/src/nd2/_binary.py index 7a5431f..50cdf9e 100644 --- a/src/nd2/_binary.py +++ b/src/nd2/_binary.py @@ -3,9 +3,11 @@ import io import struct +import sys import warnings import zlib -from typing import TYPE_CHECKING, Iterator, NamedTuple, Sequence, cast, overload +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Iterator, Sequence, cast, overload import numpy as np @@ -17,12 +19,21 @@ I9 = struct.Struct("<" + "I" * 9) I2 = struct.Struct("<" + "I" * 2) +SLOTS = {} +if sys.version_info >= (3, 10): + SLOTS["slots"] = True -class BinaryLayer(NamedTuple): + +@dataclass(frozen=True, **SLOTS) +class BinaryLayer: """Wrapper for data from a single binary layer in an [`nd2.ND2File`][]. - `data` will have length of num_sequences, with `None` for any frames - that lack binary data. + A "layer" is a set of binary data that can be associated with a + specific component in an ND2 file, such as a single channel. + + This object behaves like a `list[numpy.ndarray] | None`. + It will have a length matching the number of frames in the file, with `None` for + any frames that lack binary data. Attributes ---------- @@ -54,7 +65,7 @@ class BinaryLayer(NamedTuple): to reshape the data into a 3D array in `asarray`. """ - data: list[np.ndarray | None] + data: list[np.ndarray | None] = field(repr=False) name: str file_tag: str comp_name: str | None @@ -65,6 +76,14 @@ class BinaryLayer(NamedTuple): layer_id: int | None coordinate_shape: tuple[int, ...] + def __len__(self) -> int: + """Return the number of frames in the data.""" + return len(self.data) + + def __getitem__(self, key: int) -> np.ndarray | None: + """Return the data for a single frame.""" + return self.data[key] + @property def frame_shape(self) -> tuple[int, ...]: """Shape (Y, X) of each mask in `data`.""" @@ -94,12 +113,6 @@ def asarray(self) -> np.ndarray | None: "np.ndarray", np.stack(d).reshape(self.coordinate_shape + frame_shape) ) - def __repr__(self) -> str: - """Return a nicely formatted string.""" - field_names = (f for f in self._fields if f != "data") - repr_fmt = "(" + ", ".join(f"{name}=%r" for name in field_names) + ")" - return self.__class__.__name__ + repr_fmt % self[1:] - class BinaryLayers(Sequence[BinaryLayer]): """Sequence of Binary Layers found in an ND2 file. diff --git a/src/nd2/nd2file.py b/src/nd2/nd2file.py index 1ce40e1..1c4afc0 100644 --- a/src/nd2/nd2file.py +++ b/src/nd2/nd2file.py @@ -1155,11 +1155,15 @@ def binary_data(self) -> BinaryLayers | None: """Return binary layers embedded in the file. The returned `BinaryLayers` object is an immutable sequence of `BinaryLayer` - objects, one for each binary layer in the file. Each `BinaryLayer` object in - the sequence has a `name` attribute, and a `data` attribute which is list of - numpy arrays (or `None` if there was no binary mask for that frame). The length - of the list will be the same as the number of sequence frames in this file - (i.e. `self.attributes.sequenceCount`). + objects, one for each binary layer in the file (there will usually be a binary + layer associated with each channel in the dataset). + + Each `BinaryLayer` object in the sequence has a `name` attribute, and a `data` + attribute which is list of numpy arrays (or `None` if there was no binary mask + for that frame). The length of the list will be the same as the number of + sequence frames in this file (i.e. `self.attributes.sequenceCount`). + `BinaryLayers` can be indexed directly with an integer corresponding to the + *frame* index. Both the `BinaryLayers` and individual `BinaryLayer` objects can be cast to a numpy array with `np.asarray()`, or by using the `.asarray()` method @@ -1175,12 +1179,15 @@ def binary_data(self) -> BinaryLayers | None: >>> f = ND2File("path/to/file.nd2") >>> f.binary_data - >>> f.binary_data[0] # the first binary layer + >>> first_layer = f.binary_data[0] # the first binary layer + >>> first_layer BinaryLayer(name='attached Widefield green (green color)', comp_name='Widefield Green', comp_order=2, color=65280, color_mode=0, state=524288, file_tag='RleZipBinarySequence_1_v1', layer_id=2) - >>> f.binary_data[0].data # list of arrays - >>> np.asarray(f.binary_data[0]) # just the first binary mask + >>> first_layer.data # list of arrays + # you can also index in to the BinaryLayers object itself + >>> first_layer[0] # get binary data for first frame (or None if missing) + >>> np.asarray(first_layer) # cast to array matching shape of full sequence >>> np.asarray(f.binary_data).shape # cast all layers to array (4, 3, 4, 5, 32, 32) """ diff --git a/tests/test_binary.py b/tests/test_binary.py index 4eda9d7..3f64267 100644 --- a/tests/test_binary.py +++ b/tests/test_binary.py @@ -2,17 +2,30 @@ import nd2 import numpy as np +import numpy.testing as npt DATA = Path(__file__).parent / "data" +# fmt: off +ROW0 = [0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,0,0,0,3,0,0,0,0,0,0,0] +# fmt: on + def test_binary(): with nd2.ND2File(DATA / "with_binary_and_rois.nd2") as f: binlayers = f.binary_data + repr(binlayers) + repr(binlayers[0]) assert binlayers is not None assert len(binlayers) == 4 assert binlayers[0].name == "attached Widefield green (green color)" - assert len(binlayers[0].data) == f.attributes.sequenceCount + assert len(binlayers[0]) == f.attributes.sequenceCount + # you can index into the data + npt.assert_array_equal(binlayers[0].data[2][0], ROW0) + # you can also index a BinaryLayer directly + assert isinstance(binlayers[0][2], np.ndarray) + assert binlayers[0][3] is None + npt.assert_array_equal(binlayers[0][2][0], ROW0) ary = np.asarray(binlayers) assert ary.shape == (4, 3, 4, 5, 32, 32) assert ary.sum() == 172947