diff --git a/README.md b/README.md index 8d25a2b..8cf6e9c 100644 --- a/README.md +++ b/README.md @@ -70,14 +70,16 @@ f.rois # Dict[int, nd2.structures.ROI] f.voxel_size() # VoxelSize(x=0.65, y=0.65, z=1.0) f.text_info # dict of misc info +f.binary_data # any binary masks stored in the file. See below. +f.custom_data # bits of unstructured metadata that start with CustomData +f.recorded_data # returns a dict of lists (passable to pandas.DataFrame) that + # the tabular "Recorded Data" view from in NIS Elements/Viewer + # with info for each frame in the experiment. + # allll the metadata we can find... # no attempt made to standardize or parse it # look in here if you're searching for metdata that isn't exposed in the above f.unstructured_metadata() -f.custom_data # bits of unstructured metadata that start with CustomData -f.recorded_data # returns a dict of lists (passable to pandas.DataFrame) that - # the tabular "Recorded Data" view from in NIS Elements/Viewer - # with info for each frame in the experiment. f.close() # don't forget to close when done! f.closed # boolean, whether the file is closed @@ -575,7 +577,7 @@ No attempt is made to parse this data. It will vary from file to file, but you recorded_data -This method returns a `dict` of equal-length sequences. +This property returns a `dict` of equal-length sequences. It matches the tabular data reported in the `Image Properties > Recorded Data` tab of the NIS Viewer. (There will be a column for each tag in the `CustomDataV2_0` section of `custom_data` above.) @@ -627,7 +629,49 @@ Out[13]: 14 12.665469 2.0 100.0 0 0 31452.2 -1801.6 556.68 556.68 ``` + + + +
+ +binary_data + +This property returns an `nd2.BinaryLayers` object representing all of the +binary masks in the nd2 file. + +A `nd2.BinaryLayers` object is a sequence of individual `nd2.BinaryLayer` +objects (one for each binary layer found in the file). Each `BinaryLayer` in +the sequence is a named tuple that has, among other things, a `name` attribute, +and a `data` attribute that is list of numpy arrays (one for each frame in the +experiment) or `None` if the binary layer had no data in that frame. + +The most common use case will be to cast either the entire `BinaryLayers` object +or an individual `BinaryLayer` to a `numpy.ndarray`: + +```python +>>> import nd2 +>>> nd2file = nd2.ND2File('path/to/file.nd2') +>>> binary_layers = nd2file.binary_data + +# The output array will have shape +# (n_binary_layers, *coord_shape, *frame_shape). +>>> np.asarray(binary_layers) +``` + +For example, if the data in the nd2 file has shape `(nT, nZ, nC, nY, nX)`, and +there are 4 binary layers, then the output of `np.asarray(nd2file.binary_data)` will +have shape `(4, nT, nZ, nY, nX)`. (Note that the `nC` dimension is not present +in the output array, and the binary layers are always in the first axis). + +You can also cast an individual `BinaryLayer` to a numpy array: + +```python +>>> binary_layer = binary_layers[0] +>>> np.asarray(binary_layer) +``` +
+ ## alternatives - [pims_nd2](https://github.com/soft-matter/pims_nd2) - *pims-based reader. ctypes wrapper around the v9.00 (2015) SDK* diff --git a/scripts/download_samples.py b/scripts/download_samples.py index 5da2d0d..0b5d757 100644 --- a/scripts/download_samples.py +++ b/scripts/download_samples.py @@ -1,3 +1,4 @@ +import shutil import sys from io import BytesIO from pathlib import Path @@ -5,8 +6,8 @@ import requests -TEST_DATA = str(Path(__file__).parent.parent / "tests" / "data") -URL = "https://www.dropbox.com/s/q57orjfzzagzull/nd2_test_data.zip?dl=1" +TEST_DATA = Path(__file__).parent.parent / "tests" / "data" +URL = "https://www.dropbox.com/s/heo9ss4tcsi15x5/nd2_test_data.zip?dl=1" def main(): @@ -26,13 +27,8 @@ def main(): sys.stdout.write(f'\r[{"=" * done}{" " * (50 - done)}]') sys.stdout.flush() with ZipFile(f) as zf: - zf.extractall(TEST_DATA) - - -# def main(dest: str = TEST_DATA): -# with request.urlopen(URL) as resp: -# with ZipFile(BytesIO(resp.read())) as zf: -# zf.extractall(dest) + zf.extractall(str(TEST_DATA)) + shutil.rmtree(TEST_DATA / "__MACOSX") if __name__ == "__main__": diff --git a/src/nd2/__init__.py b/src/nd2/__init__.py index bf75bac..e6e0e1e 100644 --- a/src/nd2/__init__.py +++ b/src/nd2/__init__.py @@ -5,17 +5,20 @@ __author__ = "Talley Lambert" __email__ = "talley.lambert@gmail.com" __all__ = [ - "ND2File", - "imread", - "structures", "AXIS", + "BinaryLayer", + "BinaryLayers", + "imread", "is_supported_file", + "ND2File", "read_chunkmap", "rescue_nd2", + "structures", ] from . import structures +from ._binary import BinaryLayer, BinaryLayers from ._chunkmap import read_chunkmap, rescue_nd2 from ._util import AXIS, is_supported_file from .nd2file import ND2File, imread diff --git a/src/nd2/_binary.py b/src/nd2/_binary.py new file mode 100644 index 0000000..79ae26f --- /dev/null +++ b/src/nd2/_binary.py @@ -0,0 +1,248 @@ +"""Utilities for binary layers in ND2 files.""" +from __future__ import annotations + +import io +import struct +import warnings +from typing import ( + TYPE_CHECKING, + Iterator, + List, + NamedTuple, + Sequence, + Tuple, + cast, + overload, +) + +import numpy as np + +if TYPE_CHECKING: + from ._sdk.latest import ND2Reader as LatestSDKReader + from .nd2file import ND2File + +I7 = struct.Struct("<" + "I" * 7) +I9 = struct.Struct("<" + "I" * 9) +I2 = struct.Struct("<" + "I" * 2) + + +class BinaryLayer(NamedTuple): + """Wrapper for data from a single binary layer in an ND2 file. + + `data` will have length of num_sequences, with `None` for any frames + that lack binary data. + + Parameters + ---------- + data : list of numpy.ndarray or None + The data for each frame. If a frame has no binary data, the value + will be None. Data will have the same length as the number of sequences + in the file. + name: str + The name of the binary layer. + comp_name: str + The name of the associated component, if Any. + comp_order: int + The order of the associated component, if Any. + color: int + The color of the binary layer. + color_mode: int + The color mode of the binary layer. I believe this is related to how colors + are chosen in NIS-Elements software. Where "0" is direct color (i.e. use, + the color value), "8" is color by 3D ... and I'm not sure about the rest :) + state: int + The state of the binary layer. (meaning still unclear) + file_tag: str + The key for the binary layer in the CustomData metadata, + e.g. `RleZipBinarySequence_1_v1` + layer_id: int + The ID of the binary layer. + coordinate_shape: tuple of int + The shape of the coordinates for the associated nd2 file. This is used + to reshape the data into a 3D array in `asarray`. + """ + + data: List[np.ndarray | None] + name: str + comp_name: str + comp_order: int + color: int + color_mode: int + state: int + file_tag: str + layer_id: int + coordinate_shape: Tuple[int, ...] + + @property + def frame_shape(self) -> Tuple[int, ...]: + """Shape (Y, X) of each mask in `data`.""" + return next((s.shape for s in self.data if s is not None), (0, 0)) + + def __array__(self) -> np.ndarray: + """Return the data as a numpy array.""" + ary = self.asarray() + return ary if ary is not None else np.ndarray([]) + + def asarray(self) -> np.ndarray | None: + """Stack all the frames into a single array. + + If there are no frames, returns None. + """ + frame_shape = self.frame_shape + if frame_shape == (0, 0): + return None + + # TODO: this is a bit of a hack (takes up memory), but it works for now + # could do something with dask + d = [ + i if i is not None else np.zeros(frame_shape, dtype="uint16") + for i in self.data + ] + return np.stack(d).reshape(self.coordinate_shape + frame_shape) + + def __repr__(self) -> str: + """Return a nicely formatted string.""" + field_names = (f for f in self._fields if f != "data") + repr_fmt = "(" + ", ".join(f"{name}=%r" for name in field_names) + ")" + return self.__class__.__name__ + repr_fmt % self[1:] + + +class BinaryLayers(Sequence[BinaryLayer]): + """Sequence of Binary Layers found in an ND2 file. + + This object is a sequence of `BinaryLayer` objects, one for each binary layer in the + file. Each layer has a `name` attribute, and a `data` attribute that is list of + numpy arrays - one for each frame in the experiment - or None if the layer was not + present in that frame. + + The wrapper can be cast to a numpy array (with `BinaryLayers.asarray()` or + np.asarray(BinaryLayers)) to stack all the layers into a single array. The output + array will have shape (n_layers, *coord_shape, *frame_shape). + """ + + def __init__(self, data: list[BinaryLayer]) -> None: + self._data = data + + @overload + def __getitem__(self, key: int) -> BinaryLayer: + ... + + @overload + def __getitem__(self, key: slice) -> List[BinaryLayer]: + ... + + def __getitem__(self, key: int | slice) -> BinaryLayer | List[BinaryLayer]: + return self._data[key] + + def __iter__(self) -> Iterator[BinaryLayer]: + return iter(self._data) + + def __len__(self) -> int: + return len(self._data) + + def __repr__(self) -> str: + return f"<{type(self).__name__} with {len(self)} layers>" + + def __array__(self) -> np.ndarray: + """Compatibility with np.asarray(BinaryLayers).""" + return self.asarray() + + def asarray(self) -> np.ndarray: + """Stack all the layers/frames into a single array. + + The output array will have shape (n_layers, *coord_shape, *frame_shape). + """ + out = [] + for bin_layer in self._data: + d = bin_layer.asarray() + if d is not None: + out.append(d) + return np.stack(out) + + @classmethod + def from_nd2file(cls, nd2file: ND2File) -> BinaryLayers | None: + """Extract binary layers from an ND2 file.""" + if nd2file.is_legacy: + warnings.warn( + "`binary_data` is not supported for legacy ND2 files", UserWarning + ) + return None + rdr = cast("LatestSDKReader", nd2file._rdr) + + binary_meta = nd2file.custom_data.get("BinaryMetadata_v1") + if binary_meta is None: + return None + try: + items: List[dict] = binary_meta["BinaryMetadata_v1"]["BinaryItem"] + except KeyError: + warnings.warn( + "Could not find 'BinaryMetadata_v1->BinaryItem' tag, please open an " + "issue with this file at https://github.com/tlambert03/nd2/issues/new", + ) + return None + if isinstance(items, dict): + items = [items] + + binseqs = sorted(x for x in rdr._meta_map if "RleZipBinarySequence" in x) + mask_items = [] + for item in items: + key = item["FileTag"] + _masks: List[np.ndarray | None] = [] + for bs in binseqs: + if key in bs: + data = rdr._get_meta_chunk(bs)[4:] + _masks.append(_decode_binary_mask(data) if data else None) + mask_items.append( + BinaryLayer( + data=_masks, + name=item["Name"], + comp_name=item["CompName"], + comp_order=item["CompOrder"], + color_mode=item["ColorMode"], + state=item["State"], + color=item["Color"], + file_tag=key, + layer_id=item["BinLayerID"], + coordinate_shape=nd2file._coord_shape, + ) + ) + + return cls(mask_items) + + +def _unpack(stream: io.BufferedIOBase, strct: struct.Struct): + return strct.unpack(stream.read(strct.size)) + + +def _decode_binary_mask(data: bytes, dtype="uint16") -> np.ndarray: + # this receives data as would be extracted from a + # `CustomDataSeq|RleZipBinarySequence...` section in the metadata + # data = f._rdr._get_meta_chunk('CustomDataSeq|RleZipBinarySequence_1_v1|0')[:4] + + # NOTE it is up to ND2File to strip the first 4 bytes... and not call this if there + # is no data (i.e. if the chunk is just '\x00') + import zlib + + decomp = zlib.decompress(data) + stream = io.BytesIO(decomp) + + # still not sure what _q is + # tot_bytes should be length of the stream remaining after this + (v, ncols, nrows, nmasks, tot_bytes, _q, _zero) = _unpack(stream, I7) + if v != 3: + warnings.warn( + f"Expected first byte to be 3 but got {v}. " + "Please submit this file :) https://github.com/tlambert03/nd2/issues/." + ) + + output = np.zeros((nrows, ncols), dtype=dtype) + for _m in range(nmasks): + # (1, 1, 0, 15, 11, 412, 12, 396, 0) + (roi_id, c0, r0, c1, r1, roi_bytes, maskrows, _y, _zero) = _unpack(stream, I9) + for _r in range(maskrows): + (row, nruns) = _unpack(stream, I2) + for _s in range(nruns): + (col, n) = _unpack(stream, I2) + output[row, col : col + n] = roi_id # noqa: E203 + + return output diff --git a/src/nd2/nd2file.py b/src/nd2/nd2file.py index 05a812d..0b75809 100644 --- a/src/nd2/nd2file.py +++ b/src/nd2/nd2file.py @@ -47,6 +47,7 @@ import xarray as xr from typing_extensions import Literal + from ._binary import BinaryLayers from ._sdk.latest import ND2Reader as LatestSDKReader from .structures import Position @@ -729,7 +730,7 @@ def recorded_data(self) -> Dict[str, Union[np.ndarray, Sequence]]: if "CustomDataV2_0" not in cd: return {} try: - tags: dict = self.custom_data["CustomDataV2_0"]["CustomTagDescription_v1.0"] + tags: dict = cd["CustomDataV2_0"]["CustomTagDescription_v1.0"] except KeyError: warnings.warn( "Could not find 'CustomTagDescription_v1' tag, please open an issue " @@ -784,6 +785,44 @@ def recorded_data(self) -> Dict[str, Union[np.ndarray, Sequence]]: return data + @cached_property + def binary_data(self) -> BinaryLayers | None: + """Return binary layers embedded in the file. + + The returned `BinaryLayers` object is an immutable sequence of `BinaryLayer` + objects, one for each binary layer in the file. Each `BinaryLayer` object in + the sequence has a `name` attribute, and a `data` attribute which is list of + numpy arrays (or `None` if there was no binary mask for that frame). The length + of the list will be the same as the number of sequence frames in this file + (i.e. `self.attributes.sequenceCount`). + + Both the `BinaryLayers` and individual `BinaryLayer` objects can be cast to a + numpy array with `np.asarray()`, or by using the `.asarray()` method + + Returns + ------- + BinaryLayers | None + The binary layers embedded in the file, or None if there are no binary + layers. + + Examples + -------- + >>> f = ND2File("path/to/file.nd2") + >>> f.binary_data + + >>> f.binary_data[0] # the first binary layer + BinaryLayer(name='attached Widefield green (green color)', + comp_name='Widefield Green', comp_order=2, color=65280, color_mode=0, + state=524288, file_tag='RleZipBinarySequence_1_v1', layer_id=2) + >>> f.binary_data[0].data # list of arrays + >>> np.asarray(f.binary_data[0]) # just the first binary mask + >>> np.asarray(f.binary_data).shape # cast all layers to array + (4, 3, 4, 5, 32, 32) + """ + from ._binary import BinaryLayers + + return BinaryLayers.from_nd2file(self) + @overload def imread( diff --git a/src/nd2/structures.py b/src/nd2/structures.py index ff5d518..5213741 100644 --- a/src/nd2/structures.py +++ b/src/nd2/structures.py @@ -3,10 +3,13 @@ import builtins from dataclasses import dataclass, field from enum import Enum, IntEnum -from typing import List, NamedTuple, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, NamedTuple, Optional, Tuple, Union from typing_extensions import Literal +if TYPE_CHECKING: + pass + # enums diff --git a/tests/test_binary.py b/tests/test_binary.py new file mode 100644 index 0000000..db54470 --- /dev/null +++ b/tests/test_binary.py @@ -0,0 +1,19 @@ +from pathlib import Path + +import numpy as np + +import nd2 + +DATA = Path(__file__).parent / "data" + + +def test_binary(): + with nd2.ND2File(DATA / "with_binary_and_rois.nd2") as f: + binlayers = f.binary_data + assert binlayers is not None + assert len(binlayers) == 4 + assert binlayers[0].name == "attached Widefield green (green color)" + assert len(binlayers[0].data) == f.attributes.sequenceCount + ary = np.asarray(binlayers) + assert ary.shape == (4, 3, 4, 5, 32, 32) + assert ary.sum() == 172947