Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add ND2File.binary_data property - extract binary layers from file #108

Merged
merged 7 commits into from
Nov 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 49 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,16 @@ f.rois # Dict[int, nd2.structures.ROI]
f.voxel_size() # VoxelSize(x=0.65, y=0.65, z=1.0)
f.text_info # dict of misc info

f.binary_data # any binary masks stored in the file. See below.
f.custom_data # bits of unstructured metadata that start with CustomData
f.recorded_data # returns a dict of lists (passable to pandas.DataFrame) that
# the tabular "Recorded Data" view from in NIS Elements/Viewer
# with info for each frame in the experiment.

# allll the metadata we can find...
# no attempt made to standardize or parse it
# look in here if you're searching for metdata that isn't exposed in the above
f.unstructured_metadata()
f.custom_data # bits of unstructured metadata that start with CustomData
f.recorded_data # returns a dict of lists (passable to pandas.DataFrame) that
# the tabular "Recorded Data" view from in NIS Elements/Viewer
# with info for each frame in the experiment.

f.close() # don't forget to close when done!
f.closed # boolean, whether the file is closed
Expand Down Expand Up @@ -575,7 +577,7 @@ No attempt is made to parse this data. It will vary from file to file, but you

<summary><code>recorded_data</code></summary>

This method returns a `dict` of equal-length sequences.
This property returns a `dict` of equal-length sequences.
It matches the tabular data reported in the `Image Properties > Recorded Data` tab of the NIS Viewer.

(There will be a column for each tag in the `CustomDataV2_0` section of `custom_data` above.)
Expand Down Expand Up @@ -627,7 +629,49 @@ Out[13]:
14 12.665469 2.0 100.0 0 0 31452.2 -1801.6 556.68 556.68

```

</details>

<details>

<summary><code>binary_data</code></summary>

This property returns an `nd2.BinaryLayers` object representing all of the
binary masks in the nd2 file.

A `nd2.BinaryLayers` object is a sequence of individual `nd2.BinaryLayer`
objects (one for each binary layer found in the file). Each `BinaryLayer` in
the sequence is a named tuple that has, among other things, a `name` attribute,
and a `data` attribute that is list of numpy arrays (one for each frame in the
experiment) or `None` if the binary layer had no data in that frame.

The most common use case will be to cast either the entire `BinaryLayers` object
or an individual `BinaryLayer` to a `numpy.ndarray`:

```python
>>> import nd2
>>> nd2file = nd2.ND2File('path/to/file.nd2')
>>> binary_layers = nd2file.binary_data

# The output array will have shape
# (n_binary_layers, *coord_shape, *frame_shape).
>>> np.asarray(binary_layers)
```

For example, if the data in the nd2 file has shape `(nT, nZ, nC, nY, nX)`, and
there are 4 binary layers, then the output of `np.asarray(nd2file.binary_data)` will
have shape `(4, nT, nZ, nY, nX)`. (Note that the `nC` dimension is not present
in the output array, and the binary layers are always in the first axis).

You can also cast an individual `BinaryLayer` to a numpy array:

```python
>>> binary_layer = binary_layers[0]
>>> np.asarray(binary_layer)
```

</details>

## alternatives

- [pims_nd2](https://github.com/soft-matter/pims_nd2) - *pims-based reader. ctypes wrapper around the v9.00 (2015) SDK*
Expand Down
14 changes: 5 additions & 9 deletions scripts/download_samples.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import shutil
import sys
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile

import requests

TEST_DATA = str(Path(__file__).parent.parent / "tests" / "data")
URL = "https://www.dropbox.com/s/q57orjfzzagzull/nd2_test_data.zip?dl=1"
TEST_DATA = Path(__file__).parent.parent / "tests" / "data"
URL = "https://www.dropbox.com/s/heo9ss4tcsi15x5/nd2_test_data.zip?dl=1"


def main():
Expand All @@ -26,13 +27,8 @@ def main():
sys.stdout.write(f'\r[{"=" * done}{" " * (50 - done)}]')
sys.stdout.flush()
with ZipFile(f) as zf:
zf.extractall(TEST_DATA)


# def main(dest: str = TEST_DATA):
# with request.urlopen(URL) as resp:
# with ZipFile(BytesIO(resp.read())) as zf:
# zf.extractall(dest)
zf.extractall(str(TEST_DATA))
shutil.rmtree(TEST_DATA / "__MACOSX")


if __name__ == "__main__":
Expand Down
9 changes: 6 additions & 3 deletions src/nd2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@
__author__ = "Talley Lambert"
__email__ = "talley.lambert@gmail.com"
__all__ = [
"ND2File",
"imread",
"structures",
"AXIS",
"BinaryLayer",
"BinaryLayers",
"imread",
"is_supported_file",
"ND2File",
"read_chunkmap",
"rescue_nd2",
"structures",
]


from . import structures
from ._binary import BinaryLayer, BinaryLayers
from ._chunkmap import read_chunkmap, rescue_nd2
from ._util import AXIS, is_supported_file
from .nd2file import ND2File, imread
248 changes: 248 additions & 0 deletions src/nd2/_binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
"""Utilities for binary layers in ND2 files."""
from __future__ import annotations

import io
import struct
import warnings
from typing import (
TYPE_CHECKING,
Iterator,
List,
NamedTuple,
Sequence,
Tuple,
cast,
overload,
)

import numpy as np

if TYPE_CHECKING:
from ._sdk.latest import ND2Reader as LatestSDKReader
from .nd2file import ND2File

I7 = struct.Struct("<" + "I" * 7)
I9 = struct.Struct("<" + "I" * 9)
I2 = struct.Struct("<" + "I" * 2)


class BinaryLayer(NamedTuple):
"""Wrapper for data from a single binary layer in an ND2 file.

`data` will have length of num_sequences, with `None` for any frames
that lack binary data.

Parameters
----------
data : list of numpy.ndarray or None
The data for each frame. If a frame has no binary data, the value
will be None. Data will have the same length as the number of sequences
in the file.
name: str
The name of the binary layer.
comp_name: str
The name of the associated component, if Any.
comp_order: int
The order of the associated component, if Any.
color: int
The color of the binary layer.
color_mode: int
The color mode of the binary layer. I believe this is related to how colors
are chosen in NIS-Elements software. Where "0" is direct color (i.e. use,
the color value), "8" is color by 3D ... and I'm not sure about the rest :)
state: int
The state of the binary layer. (meaning still unclear)
file_tag: str
The key for the binary layer in the CustomData metadata,
e.g. `RleZipBinarySequence_1_v1`
layer_id: int
The ID of the binary layer.
coordinate_shape: tuple of int
The shape of the coordinates for the associated nd2 file. This is used
to reshape the data into a 3D array in `asarray`.
"""

data: List[np.ndarray | None]
name: str
comp_name: str
comp_order: int
color: int
color_mode: int
state: int
file_tag: str
layer_id: int
coordinate_shape: Tuple[int, ...]

@property
def frame_shape(self) -> Tuple[int, ...]:
"""Shape (Y, X) of each mask in `data`."""
return next((s.shape for s in self.data if s is not None), (0, 0))

def __array__(self) -> np.ndarray:
"""Return the data as a numpy array."""
ary = self.asarray()
return ary if ary is not None else np.ndarray([])

def asarray(self) -> np.ndarray | None:
"""Stack all the frames into a single array.

If there are no frames, returns None.
"""
frame_shape = self.frame_shape
if frame_shape == (0, 0):
return None

# TODO: this is a bit of a hack (takes up memory), but it works for now
# could do something with dask
d = [
i if i is not None else np.zeros(frame_shape, dtype="uint16")
for i in self.data
]
return np.stack(d).reshape(self.coordinate_shape + frame_shape)

def __repr__(self) -> str:
"""Return a nicely formatted string."""
field_names = (f for f in self._fields if f != "data")
repr_fmt = "(" + ", ".join(f"{name}=%r" for name in field_names) + ")"
return self.__class__.__name__ + repr_fmt % self[1:]


class BinaryLayers(Sequence[BinaryLayer]):
"""Sequence of Binary Layers found in an ND2 file.

This object is a sequence of `BinaryLayer` objects, one for each binary layer in the
file. Each layer has a `name` attribute, and a `data` attribute that is list of
numpy arrays - one for each frame in the experiment - or None if the layer was not
present in that frame.

The wrapper can be cast to a numpy array (with `BinaryLayers.asarray()` or
np.asarray(BinaryLayers)) to stack all the layers into a single array. The output
array will have shape (n_layers, *coord_shape, *frame_shape).
"""

def __init__(self, data: list[BinaryLayer]) -> None:
self._data = data

@overload
def __getitem__(self, key: int) -> BinaryLayer:
...

@overload
def __getitem__(self, key: slice) -> List[BinaryLayer]:
...

def __getitem__(self, key: int | slice) -> BinaryLayer | List[BinaryLayer]:
return self._data[key]

def __iter__(self) -> Iterator[BinaryLayer]:
return iter(self._data)

def __len__(self) -> int:
return len(self._data)

def __repr__(self) -> str:
return f"<{type(self).__name__} with {len(self)} layers>"

def __array__(self) -> np.ndarray:
"""Compatibility with np.asarray(BinaryLayers)."""
return self.asarray()

def asarray(self) -> np.ndarray:
"""Stack all the layers/frames into a single array.

The output array will have shape (n_layers, *coord_shape, *frame_shape).
"""
out = []
for bin_layer in self._data:
d = bin_layer.asarray()
if d is not None:
out.append(d)
return np.stack(out)

@classmethod
def from_nd2file(cls, nd2file: ND2File) -> BinaryLayers | None:
"""Extract binary layers from an ND2 file."""
if nd2file.is_legacy:
warnings.warn(
"`binary_data` is not supported for legacy ND2 files", UserWarning
)
return None
rdr = cast("LatestSDKReader", nd2file._rdr)

binary_meta = nd2file.custom_data.get("BinaryMetadata_v1")
if binary_meta is None:
return None
try:
items: List[dict] = binary_meta["BinaryMetadata_v1"]["BinaryItem"]
except KeyError:
warnings.warn(
"Could not find 'BinaryMetadata_v1->BinaryItem' tag, please open an "
"issue with this file at https://github.com/tlambert03/nd2/issues/new",
)
return None
if isinstance(items, dict):
items = [items]

binseqs = sorted(x for x in rdr._meta_map if "RleZipBinarySequence" in x)
mask_items = []
for item in items:
key = item["FileTag"]
_masks: List[np.ndarray | None] = []
for bs in binseqs:
if key in bs:
data = rdr._get_meta_chunk(bs)[4:]
_masks.append(_decode_binary_mask(data) if data else None)
mask_items.append(
BinaryLayer(
data=_masks,
name=item["Name"],
comp_name=item["CompName"],
comp_order=item["CompOrder"],
color_mode=item["ColorMode"],
state=item["State"],
color=item["Color"],
file_tag=key,
layer_id=item["BinLayerID"],
coordinate_shape=nd2file._coord_shape,
)
)

return cls(mask_items)


def _unpack(stream: io.BufferedIOBase, strct: struct.Struct):
return strct.unpack(stream.read(strct.size))


def _decode_binary_mask(data: bytes, dtype="uint16") -> np.ndarray:
# this receives data as would be extracted from a
# `CustomDataSeq|RleZipBinarySequence...` section in the metadata
# data = f._rdr._get_meta_chunk('CustomDataSeq|RleZipBinarySequence_1_v1|0')[:4]

# NOTE it is up to ND2File to strip the first 4 bytes... and not call this if there
# is no data (i.e. if the chunk is just '\x00')
import zlib

decomp = zlib.decompress(data)
stream = io.BytesIO(decomp)

# still not sure what _q is
# tot_bytes should be length of the stream remaining after this
(v, ncols, nrows, nmasks, tot_bytes, _q, _zero) = _unpack(stream, I7)
if v != 3:
warnings.warn(
f"Expected first byte to be 3 but got {v}. "
"Please submit this file :) https://github.com/tlambert03/nd2/issues/."
)

output = np.zeros((nrows, ncols), dtype=dtype)
for _m in range(nmasks):
# (1, 1, 0, 15, 11, 412, 12, 396, 0)
(roi_id, c0, r0, c1, r1, roi_bytes, maskrows, _y, _zero) = _unpack(stream, I9)
for _r in range(maskrows):
(row, nruns) = _unpack(stream, I2)
for _s in range(nruns):
(col, n) = _unpack(stream, I2)
output[row, col : col + n] = roi_id # noqa: E203

return output
Loading