diff --git a/README.md b/README.md
index 8d25a2b..8cf6e9c 100644
--- a/README.md
+++ b/README.md
@@ -70,14 +70,16 @@ f.rois # Dict[int, nd2.structures.ROI]
f.voxel_size() # VoxelSize(x=0.65, y=0.65, z=1.0)
f.text_info # dict of misc info
+f.binary_data # any binary masks stored in the file. See below.
+f.custom_data # bits of unstructured metadata that start with CustomData
+f.recorded_data # returns a dict of lists (passable to pandas.DataFrame) that
+ # the tabular "Recorded Data" view from in NIS Elements/Viewer
+ # with info for each frame in the experiment.
+
# allll the metadata we can find...
# no attempt made to standardize or parse it
# look in here if you're searching for metdata that isn't exposed in the above
f.unstructured_metadata()
-f.custom_data # bits of unstructured metadata that start with CustomData
-f.recorded_data # returns a dict of lists (passable to pandas.DataFrame) that
- # the tabular "Recorded Data" view from in NIS Elements/Viewer
- # with info for each frame in the experiment.
f.close() # don't forget to close when done!
f.closed # boolean, whether the file is closed
@@ -575,7 +577,7 @@ No attempt is made to parse this data. It will vary from file to file, but you
recorded_data
-This method returns a `dict` of equal-length sequences.
+This property returns a `dict` of equal-length sequences.
It matches the tabular data reported in the `Image Properties > Recorded Data` tab of the NIS Viewer.
(There will be a column for each tag in the `CustomDataV2_0` section of `custom_data` above.)
@@ -627,7 +629,49 @@ Out[13]:
14 12.665469 2.0 100.0 0 0 31452.2 -1801.6 556.68 556.68
```
+
+
+
+
+
+binary_data
+
+This property returns an `nd2.BinaryLayers` object representing all of the
+binary masks in the nd2 file.
+
+A `nd2.BinaryLayers` object is a sequence of individual `nd2.BinaryLayer`
+objects (one for each binary layer found in the file). Each `BinaryLayer` in
+the sequence is a named tuple that has, among other things, a `name` attribute,
+and a `data` attribute that is list of numpy arrays (one for each frame in the
+experiment) or `None` if the binary layer had no data in that frame.
+
+The most common use case will be to cast either the entire `BinaryLayers` object
+or an individual `BinaryLayer` to a `numpy.ndarray`:
+
+```python
+>>> import nd2
+>>> nd2file = nd2.ND2File('path/to/file.nd2')
+>>> binary_layers = nd2file.binary_data
+
+# The output array will have shape
+# (n_binary_layers, *coord_shape, *frame_shape).
+>>> np.asarray(binary_layers)
+```
+
+For example, if the data in the nd2 file has shape `(nT, nZ, nC, nY, nX)`, and
+there are 4 binary layers, then the output of `np.asarray(nd2file.binary_data)` will
+have shape `(4, nT, nZ, nY, nX)`. (Note that the `nC` dimension is not present
+in the output array, and the binary layers are always in the first axis).
+
+You can also cast an individual `BinaryLayer` to a numpy array:
+
+```python
+>>> binary_layer = binary_layers[0]
+>>> np.asarray(binary_layer)
+```
+
+
## alternatives
- [pims_nd2](https://github.com/soft-matter/pims_nd2) - *pims-based reader. ctypes wrapper around the v9.00 (2015) SDK*
diff --git a/scripts/download_samples.py b/scripts/download_samples.py
index 5da2d0d..0b5d757 100644
--- a/scripts/download_samples.py
+++ b/scripts/download_samples.py
@@ -1,3 +1,4 @@
+import shutil
import sys
from io import BytesIO
from pathlib import Path
@@ -5,8 +6,8 @@
import requests
-TEST_DATA = str(Path(__file__).parent.parent / "tests" / "data")
-URL = "https://www.dropbox.com/s/q57orjfzzagzull/nd2_test_data.zip?dl=1"
+TEST_DATA = Path(__file__).parent.parent / "tests" / "data"
+URL = "https://www.dropbox.com/s/heo9ss4tcsi15x5/nd2_test_data.zip?dl=1"
def main():
@@ -26,13 +27,8 @@ def main():
sys.stdout.write(f'\r[{"=" * done}{" " * (50 - done)}]')
sys.stdout.flush()
with ZipFile(f) as zf:
- zf.extractall(TEST_DATA)
-
-
-# def main(dest: str = TEST_DATA):
-# with request.urlopen(URL) as resp:
-# with ZipFile(BytesIO(resp.read())) as zf:
-# zf.extractall(dest)
+ zf.extractall(str(TEST_DATA))
+ shutil.rmtree(TEST_DATA / "__MACOSX")
if __name__ == "__main__":
diff --git a/src/nd2/__init__.py b/src/nd2/__init__.py
index bf75bac..e6e0e1e 100644
--- a/src/nd2/__init__.py
+++ b/src/nd2/__init__.py
@@ -5,17 +5,20 @@
__author__ = "Talley Lambert"
__email__ = "talley.lambert@gmail.com"
__all__ = [
- "ND2File",
- "imread",
- "structures",
"AXIS",
+ "BinaryLayer",
+ "BinaryLayers",
+ "imread",
"is_supported_file",
+ "ND2File",
"read_chunkmap",
"rescue_nd2",
+ "structures",
]
from . import structures
+from ._binary import BinaryLayer, BinaryLayers
from ._chunkmap import read_chunkmap, rescue_nd2
from ._util import AXIS, is_supported_file
from .nd2file import ND2File, imread
diff --git a/src/nd2/_binary.py b/src/nd2/_binary.py
new file mode 100644
index 0000000..79ae26f
--- /dev/null
+++ b/src/nd2/_binary.py
@@ -0,0 +1,248 @@
+"""Utilities for binary layers in ND2 files."""
+from __future__ import annotations
+
+import io
+import struct
+import warnings
+from typing import (
+ TYPE_CHECKING,
+ Iterator,
+ List,
+ NamedTuple,
+ Sequence,
+ Tuple,
+ cast,
+ overload,
+)
+
+import numpy as np
+
+if TYPE_CHECKING:
+ from ._sdk.latest import ND2Reader as LatestSDKReader
+ from .nd2file import ND2File
+
+I7 = struct.Struct("<" + "I" * 7)
+I9 = struct.Struct("<" + "I" * 9)
+I2 = struct.Struct("<" + "I" * 2)
+
+
+class BinaryLayer(NamedTuple):
+ """Wrapper for data from a single binary layer in an ND2 file.
+
+ `data` will have length of num_sequences, with `None` for any frames
+ that lack binary data.
+
+ Parameters
+ ----------
+ data : list of numpy.ndarray or None
+ The data for each frame. If a frame has no binary data, the value
+ will be None. Data will have the same length as the number of sequences
+ in the file.
+ name: str
+ The name of the binary layer.
+ comp_name: str
+ The name of the associated component, if Any.
+ comp_order: int
+ The order of the associated component, if Any.
+ color: int
+ The color of the binary layer.
+ color_mode: int
+ The color mode of the binary layer. I believe this is related to how colors
+ are chosen in NIS-Elements software. Where "0" is direct color (i.e. use,
+ the color value), "8" is color by 3D ... and I'm not sure about the rest :)
+ state: int
+ The state of the binary layer. (meaning still unclear)
+ file_tag: str
+ The key for the binary layer in the CustomData metadata,
+ e.g. `RleZipBinarySequence_1_v1`
+ layer_id: int
+ The ID of the binary layer.
+ coordinate_shape: tuple of int
+ The shape of the coordinates for the associated nd2 file. This is used
+ to reshape the data into a 3D array in `asarray`.
+ """
+
+ data: List[np.ndarray | None]
+ name: str
+ comp_name: str
+ comp_order: int
+ color: int
+ color_mode: int
+ state: int
+ file_tag: str
+ layer_id: int
+ coordinate_shape: Tuple[int, ...]
+
+ @property
+ def frame_shape(self) -> Tuple[int, ...]:
+ """Shape (Y, X) of each mask in `data`."""
+ return next((s.shape for s in self.data if s is not None), (0, 0))
+
+ def __array__(self) -> np.ndarray:
+ """Return the data as a numpy array."""
+ ary = self.asarray()
+ return ary if ary is not None else np.ndarray([])
+
+ def asarray(self) -> np.ndarray | None:
+ """Stack all the frames into a single array.
+
+ If there are no frames, returns None.
+ """
+ frame_shape = self.frame_shape
+ if frame_shape == (0, 0):
+ return None
+
+ # TODO: this is a bit of a hack (takes up memory), but it works for now
+ # could do something with dask
+ d = [
+ i if i is not None else np.zeros(frame_shape, dtype="uint16")
+ for i in self.data
+ ]
+ return np.stack(d).reshape(self.coordinate_shape + frame_shape)
+
+ def __repr__(self) -> str:
+ """Return a nicely formatted string."""
+ field_names = (f for f in self._fields if f != "data")
+ repr_fmt = "(" + ", ".join(f"{name}=%r" for name in field_names) + ")"
+ return self.__class__.__name__ + repr_fmt % self[1:]
+
+
+class BinaryLayers(Sequence[BinaryLayer]):
+ """Sequence of Binary Layers found in an ND2 file.
+
+ This object is a sequence of `BinaryLayer` objects, one for each binary layer in the
+ file. Each layer has a `name` attribute, and a `data` attribute that is list of
+ numpy arrays - one for each frame in the experiment - or None if the layer was not
+ present in that frame.
+
+ The wrapper can be cast to a numpy array (with `BinaryLayers.asarray()` or
+ np.asarray(BinaryLayers)) to stack all the layers into a single array. The output
+ array will have shape (n_layers, *coord_shape, *frame_shape).
+ """
+
+ def __init__(self, data: list[BinaryLayer]) -> None:
+ self._data = data
+
+ @overload
+ def __getitem__(self, key: int) -> BinaryLayer:
+ ...
+
+ @overload
+ def __getitem__(self, key: slice) -> List[BinaryLayer]:
+ ...
+
+ def __getitem__(self, key: int | slice) -> BinaryLayer | List[BinaryLayer]:
+ return self._data[key]
+
+ def __iter__(self) -> Iterator[BinaryLayer]:
+ return iter(self._data)
+
+ def __len__(self) -> int:
+ return len(self._data)
+
+ def __repr__(self) -> str:
+ return f"<{type(self).__name__} with {len(self)} layers>"
+
+ def __array__(self) -> np.ndarray:
+ """Compatibility with np.asarray(BinaryLayers)."""
+ return self.asarray()
+
+ def asarray(self) -> np.ndarray:
+ """Stack all the layers/frames into a single array.
+
+ The output array will have shape (n_layers, *coord_shape, *frame_shape).
+ """
+ out = []
+ for bin_layer in self._data:
+ d = bin_layer.asarray()
+ if d is not None:
+ out.append(d)
+ return np.stack(out)
+
+ @classmethod
+ def from_nd2file(cls, nd2file: ND2File) -> BinaryLayers | None:
+ """Extract binary layers from an ND2 file."""
+ if nd2file.is_legacy:
+ warnings.warn(
+ "`binary_data` is not supported for legacy ND2 files", UserWarning
+ )
+ return None
+ rdr = cast("LatestSDKReader", nd2file._rdr)
+
+ binary_meta = nd2file.custom_data.get("BinaryMetadata_v1")
+ if binary_meta is None:
+ return None
+ try:
+ items: List[dict] = binary_meta["BinaryMetadata_v1"]["BinaryItem"]
+ except KeyError:
+ warnings.warn(
+ "Could not find 'BinaryMetadata_v1->BinaryItem' tag, please open an "
+ "issue with this file at https://github.com/tlambert03/nd2/issues/new",
+ )
+ return None
+ if isinstance(items, dict):
+ items = [items]
+
+ binseqs = sorted(x for x in rdr._meta_map if "RleZipBinarySequence" in x)
+ mask_items = []
+ for item in items:
+ key = item["FileTag"]
+ _masks: List[np.ndarray | None] = []
+ for bs in binseqs:
+ if key in bs:
+ data = rdr._get_meta_chunk(bs)[4:]
+ _masks.append(_decode_binary_mask(data) if data else None)
+ mask_items.append(
+ BinaryLayer(
+ data=_masks,
+ name=item["Name"],
+ comp_name=item["CompName"],
+ comp_order=item["CompOrder"],
+ color_mode=item["ColorMode"],
+ state=item["State"],
+ color=item["Color"],
+ file_tag=key,
+ layer_id=item["BinLayerID"],
+ coordinate_shape=nd2file._coord_shape,
+ )
+ )
+
+ return cls(mask_items)
+
+
+def _unpack(stream: io.BufferedIOBase, strct: struct.Struct):
+ return strct.unpack(stream.read(strct.size))
+
+
+def _decode_binary_mask(data: bytes, dtype="uint16") -> np.ndarray:
+ # this receives data as would be extracted from a
+ # `CustomDataSeq|RleZipBinarySequence...` section in the metadata
+ # data = f._rdr._get_meta_chunk('CustomDataSeq|RleZipBinarySequence_1_v1|0')[:4]
+
+ # NOTE it is up to ND2File to strip the first 4 bytes... and not call this if there
+ # is no data (i.e. if the chunk is just '\x00')
+ import zlib
+
+ decomp = zlib.decompress(data)
+ stream = io.BytesIO(decomp)
+
+ # still not sure what _q is
+ # tot_bytes should be length of the stream remaining after this
+ (v, ncols, nrows, nmasks, tot_bytes, _q, _zero) = _unpack(stream, I7)
+ if v != 3:
+ warnings.warn(
+ f"Expected first byte to be 3 but got {v}. "
+ "Please submit this file :) https://github.com/tlambert03/nd2/issues/."
+ )
+
+ output = np.zeros((nrows, ncols), dtype=dtype)
+ for _m in range(nmasks):
+ # (1, 1, 0, 15, 11, 412, 12, 396, 0)
+ (roi_id, c0, r0, c1, r1, roi_bytes, maskrows, _y, _zero) = _unpack(stream, I9)
+ for _r in range(maskrows):
+ (row, nruns) = _unpack(stream, I2)
+ for _s in range(nruns):
+ (col, n) = _unpack(stream, I2)
+ output[row, col : col + n] = roi_id # noqa: E203
+
+ return output
diff --git a/src/nd2/nd2file.py b/src/nd2/nd2file.py
index 05a812d..0b75809 100644
--- a/src/nd2/nd2file.py
+++ b/src/nd2/nd2file.py
@@ -47,6 +47,7 @@
import xarray as xr
from typing_extensions import Literal
+ from ._binary import BinaryLayers
from ._sdk.latest import ND2Reader as LatestSDKReader
from .structures import Position
@@ -729,7 +730,7 @@ def recorded_data(self) -> Dict[str, Union[np.ndarray, Sequence]]:
if "CustomDataV2_0" not in cd:
return {}
try:
- tags: dict = self.custom_data["CustomDataV2_0"]["CustomTagDescription_v1.0"]
+ tags: dict = cd["CustomDataV2_0"]["CustomTagDescription_v1.0"]
except KeyError:
warnings.warn(
"Could not find 'CustomTagDescription_v1' tag, please open an issue "
@@ -784,6 +785,44 @@ def recorded_data(self) -> Dict[str, Union[np.ndarray, Sequence]]:
return data
+ @cached_property
+ def binary_data(self) -> BinaryLayers | None:
+ """Return binary layers embedded in the file.
+
+ The returned `BinaryLayers` object is an immutable sequence of `BinaryLayer`
+ objects, one for each binary layer in the file. Each `BinaryLayer` object in
+ the sequence has a `name` attribute, and a `data` attribute which is list of
+ numpy arrays (or `None` if there was no binary mask for that frame). The length
+ of the list will be the same as the number of sequence frames in this file
+ (i.e. `self.attributes.sequenceCount`).
+
+ Both the `BinaryLayers` and individual `BinaryLayer` objects can be cast to a
+ numpy array with `np.asarray()`, or by using the `.asarray()` method
+
+ Returns
+ -------
+ BinaryLayers | None
+ The binary layers embedded in the file, or None if there are no binary
+ layers.
+
+ Examples
+ --------
+ >>> f = ND2File("path/to/file.nd2")
+ >>> f.binary_data
+
+ >>> f.binary_data[0] # the first binary layer
+ BinaryLayer(name='attached Widefield green (green color)',
+ comp_name='Widefield Green', comp_order=2, color=65280, color_mode=0,
+ state=524288, file_tag='RleZipBinarySequence_1_v1', layer_id=2)
+ >>> f.binary_data[0].data # list of arrays
+ >>> np.asarray(f.binary_data[0]) # just the first binary mask
+ >>> np.asarray(f.binary_data).shape # cast all layers to array
+ (4, 3, 4, 5, 32, 32)
+ """
+ from ._binary import BinaryLayers
+
+ return BinaryLayers.from_nd2file(self)
+
@overload
def imread(
diff --git a/src/nd2/structures.py b/src/nd2/structures.py
index ff5d518..5213741 100644
--- a/src/nd2/structures.py
+++ b/src/nd2/structures.py
@@ -3,10 +3,13 @@
import builtins
from dataclasses import dataclass, field
from enum import Enum, IntEnum
-from typing import List, NamedTuple, Optional, Tuple, Union
+from typing import TYPE_CHECKING, List, NamedTuple, Optional, Tuple, Union
from typing_extensions import Literal
+if TYPE_CHECKING:
+ pass
+
# enums
diff --git a/tests/test_binary.py b/tests/test_binary.py
new file mode 100644
index 0000000..db54470
--- /dev/null
+++ b/tests/test_binary.py
@@ -0,0 +1,19 @@
+from pathlib import Path
+
+import numpy as np
+
+import nd2
+
+DATA = Path(__file__).parent / "data"
+
+
+def test_binary():
+ with nd2.ND2File(DATA / "with_binary_and_rois.nd2") as f:
+ binlayers = f.binary_data
+ assert binlayers is not None
+ assert len(binlayers) == 4
+ assert binlayers[0].name == "attached Widefield green (green color)"
+ assert len(binlayers[0].data) == f.attributes.sequenceCount
+ ary = np.asarray(binlayers)
+ assert ary.shape == (4, 3, 4, 5, 32, 32)
+ assert ary.sum() == 172947