Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add read_using_sdk parameter, default to True for compressed files #74

Merged
merged 2 commits into from
Jun 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/nd2/_sdk/latest.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Any, Dict, List, Sequence, Tuple, Union
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union

import numpy as np

Expand All @@ -12,6 +12,7 @@ class ND2Reader:
path: Union[str, Path],
validate_frames: bool = False,
search_window: int = 100,
read_using_sdk: Optional[bool] = None,
) -> None: ...
def open(self) -> None: ...
def close(self) -> None: ...
Expand Down
89 changes: 61 additions & 28 deletions src/nd2/_sdk/latest.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import json
import mmap
import warnings
from pathlib import Path
from typing import List, Sequence, Tuple
from typing import List, Optional, Sequence, Tuple

import numpy as np

Expand Down Expand Up @@ -29,38 +30,69 @@ cdef class ND2Reader:
cdef __attributes
cdef __dtype
cdef __raw_frame_shape
cdef public _read_image
cdef public _read_using_sdk
cdef _wants_read_using_sdk

def __cinit__(
self, path: str | Path, validate_frames: bool = False, search_window: int = 100
self,
path: str | Path,
validate_frames: bool = False,
search_window: int = 100,
read_using_sdk: Optional[bool] = None,
):
self._is_open = 0
self.__raw_frame_shape = None
self._fh = NULL
self.path = str(path)
self._wants_read_using_sdk = read_using_sdk
self.open()

with open(path, 'rb') as pyfh:
self._frame_map, self._meta_map = read_new_chunkmap(
pyfh, validate_frames=validate_frames, search_window=search_window
)
if validate_frames:
self._frame_map = self._frame_map['good']
if read_using_sdk is None:
read_using_sdk = self.attributes.compressionType is not None
self._read_using_sdk = read_using_sdk

if self._read_using_sdk:
self._read_image = self._read_image_with_sdk
self._frame_map, self._meta_map = {}, {}
self._max_frame_index = 0
else:
self._read_image = self._read_image_from_memmap

with open(path, 'rb') as pyfh:
self._frame_map, self._meta_map = read_new_chunkmap(
pyfh, validate_frames=validate_frames, search_window=search_window
)
if validate_frames:
self._frame_map = self._frame_map['good']

self._max_frame_index = max(self._frame_map)

self._max_frame_index = max(self._frame_map)
self.open()

cpdef open(self):
if not self._is_open:
self._fh = Lim_FileOpenForReadUtf8(self.path)
if not self._fh:
raise OSError("Could not open file: %s" % self.path)
with open(self.path, 'rb') as fh:
self._mmap = mmap.mmap(fh.fileno(), 0, access=mmap.ACCESS_READ)
self._is_open = 1

if self._wants_read_using_sdk is None:
self._read_using_sdk = self.attributes.compressionType is not None
else:
self._read_using_sdk = self._wants_read_using_sdk
if self.attributes.compressionType is not None and self._wants_read_using_sdk is False:
Lim_FileClose(self._fh)
raise ValueError("Cannot read compressed nd2 files with `read_using_sdk=False`")

if not self._read_using_sdk:
with open(self.path, 'rb') as fh:
self._mmap = mmap.mmap(fh.fileno(), 0, access=mmap.ACCESS_READ)

cpdef close(self):
if self._is_open:
Lim_FileClose(self._fh)
self._mmap.close()
if not self._read_using_sdk:
self._mmap.close()
self._is_open = 0

def __enter__(self):
Expand Down Expand Up @@ -191,20 +223,6 @@ cdef class ND2Reader:
"Sequence %d out of range (sequence count: %d)" % (seq_index, seq_count)
)

def _image(self, LIMUINT seq_index):
self._validate_seq(seq_index)

cdef LIMPICTURE pic = nullpic()
cdef LIMRESULT result = Lim_FileGetImageData(self._fh, seq_index, &pic)

if result != 0:
error = LIM_ERR_CODE[result]
raise RuntimeError('Error retrieving image data: %s' % error)

array_wrapper = PicWrapper()
array_wrapper.set_pic(pic, Lim_DestroyPicture)
return array_wrapper.to_ndarray()

def _custom_data(self) -> dict:
from .._xml import parse_xml_block

Expand Down Expand Up @@ -245,7 +263,21 @@ cdef class ND2Reader:
self.__dtype = np.dtype(f"{d}{a.bitsPerComponentInMemory // 8}")
return self.__dtype

cpdef np.ndarray _read_image(self, index: int):
def _read_image_with_sdk(self, LIMUINT seq_index):
self._validate_seq(seq_index)

cdef LIMPICTURE pic = nullpic()
cdef LIMRESULT result = Lim_FileGetImageData(self._fh, seq_index, &pic)

if result != 0:
error = LIM_ERR_CODE[result]
raise RuntimeError('Error retrieving image data: %s' % error)

array_wrapper = PicWrapper()
array_wrapper.set_pic(pic, Lim_DestroyPicture)
return array_wrapper.to_ndarray()

cpdef np.ndarray _read_image_from_memmap(self, index: int):
"""Read a chunk directly without using SDK"""
if index > self._max_frame_index:
raise IndexError(f"Frame out of range: {index}")
Expand Down Expand Up @@ -275,6 +307,7 @@ cdef class ND2Reader:
count=np.prod(self._raw_frame_shape()),
offset=offset
) # this will be reshaped in nd2file.py

except ValueError:
# If the chunkmap is wrong, and the mmap isn't long enough
# for the requested offset & size, a ValueError is raised.
Expand Down
12 changes: 9 additions & 3 deletions src/nd2/_util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
from datetime import datetime
from typing import IO, TYPE_CHECKING, Any, Callable, NamedTuple, Union
from typing import IO, TYPE_CHECKING, Any, Callable, NamedTuple, Optional, Union

if TYPE_CHECKING:
from os import PathLike
Expand Down Expand Up @@ -38,15 +38,21 @@ def is_supported_file(


def get_reader(
path: str, validate_frames: bool = False, search_window: int = 100
path: str,
validate_frames: bool = False,
search_window: int = 100,
read_using_sdk: Optional[bool] = None,
) -> Union["ND2Reader", "LegacyND2Reader"]:
with open(path, "rb") as fh:
magic_num = fh.read(4)
if magic_num == NEW_HEADER_MAGIC:
from ._sdk.latest import ND2Reader

return ND2Reader(
path, validate_frames=validate_frames, search_window=search_window
path,
validate_frames=validate_frames,
search_window=search_window,
read_using_sdk=read_using_sdk,
)
elif magic_num == OLD_HEADER_MAGIC:
from ._legacy import LegacyND2Reader
Expand Down
63 changes: 45 additions & 18 deletions src/nd2/nd2file.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
if TYPE_CHECKING:
from typing import Any, Dict, List, Tuple

import dask.array as da
import dask.array.core
import xarray as xr
from typing_extensions import Literal

Expand All @@ -51,8 +51,10 @@ class ND2File:
def __init__(
self,
path: Union[Path, str],
*,
validate_frames: bool = False,
search_window: int = 100,
read_using_sdk: bool = None,
) -> None:
"""Open an nd2 file.

Expand All @@ -68,10 +70,19 @@ def __init__(
search_window : int
When validate_frames is true, this is the search window (in KB) that will
be used to try to find the actual chunk position. by default 100 KB
read_using_sdk : Optional[bool]
If `True`, use the SDK to read the file. If `False`, inspects the chunkmap
and reads from a `numpy.memmap`. If `None` (the default), uses the SDK if
the file is compressed, otherwise uses the memmap. Note: using
`read_using_sdk=False` on a compressed file will result in a ValueError.

"""
self._path = str(path)
self._rdr = get_reader(
self._path, validate_frames=validate_frames, search_window=search_window
self._path,
validate_frames=validate_frames,
search_window=search_window,
read_using_sdk=read_using_sdk,
)
self._closed = False
self._is_legacy = "Legacy" in type(self._rdr).__name__
Expand Down Expand Up @@ -313,7 +324,7 @@ def __array__(self) -> np.ndarray:
"""array protocol"""
return self.asarray()

def to_dask(self, wrapper=True, copy=True) -> da.Array:
def to_dask(self, wrapper=True, copy=True) -> dask.array.core.Array:
"""Create dask array (delayed reader) representing image.

This generally works well, but it remains to be seen whether performance
Expand All @@ -328,21 +339,21 @@ def to_dask(self, wrapper=True, copy=True) -> da.Array:
wrapper : bool
If True (the default), the returned obect will be a thin subclass of
a :class:`dask.array.Array` (an
`ResourceBackedDaskArray`) that manages the opening
and closing of this file when getting chunks via compute(). If `wrapper`
is `False`, then a pure `da.Array` will be returned. However, when that
array is computed, it will incur a file open/close on *every* chunk
that is read (in the `_dask_block` method). As such `wrapper`
will generally be much faster, however, it *may* fail (i.e. result in
segmentation faults) with certain dask schedulers.
`ResourceBackedDaskArray`) that manages the opening and closing of this file
when getting chunks via compute(). If `wrapper` is `False`, then a pure
`dask.array.core.Array` will be returned. However, when that array is
computed, it will incur a file open/close on *every* chunk that is read (in
the `_dask_block` method). As such `wrapper` will generally be much faster,
however, it *may* fail (i.e. result in segmentation faults) with certain
dask schedulers.
copy : bool
If `True` (the default), the dask chunk-reading function will return
an array copy. This can avoid segfaults in certain cases, though it
may also add overhead.

Returns
-------
da.Array
dask.array.core.Array
"""
from dask.array import map_blocks

Expand Down Expand Up @@ -566,38 +577,46 @@ def __repr__(self) -> str:
@overload
def imread(
file: Union[Path, str],
dask: Literal[False] = False,
xarray: Literal[False] = False,
*,
dask: Literal[False],
xarray: Literal[False],
validate_frames: bool = False,
read_using_sdk: Optional[bool] = None,
) -> np.ndarray:
...


@overload
def imread(
file: Union[Path, str],
*,
dask: bool = ...,
xarray: Literal[True] = True,
xarray: Literal[True],
validate_frames: bool = False,
read_using_sdk: Optional[bool] = None,
) -> xr.DataArray:
...


@overload
def imread(
file: Union[Path, str],
dask: Literal[True] = ...,
xarray=False,
*,
dask: Literal[True],
xarray: Literal[False],
validate_frames: bool = False,
) -> da.Array:
read_using_sdk: Optional[bool] = None,
) -> dask.array.core.Array:
...


def imread(
file: Union[Path, str],
*,
dask: bool = False,
xarray: bool = False,
validate_frames: bool = False,
read_using_sdk: Optional[bool] = None,
):
"""Open `file`, return requested array type, and close `file`.

Expand All @@ -620,13 +639,21 @@ def imread(
shifted relative to the predicted offset (i.e. in a corrupted file).
This comes at a slight performance penalty at file open, but may "rescue"
some corrupt files. by default False.
read_using_sdk : Optional[bool]
If `True`, use the SDK to read the file. If `False`, inspects the chunkmap and
reads from a `numpy.memmap`. If `None` (the default), uses the SDK if the file
is compressed, otherwise uses the memmap.
Note: using `read_using_sdk=False` on a compressed file will result in a
ValueError.

Returns
-------
Union[np.ndarray, dask.array.Array, xarray.DataArray]
Array subclass, depending on arguments used.
"""
with ND2File(file, validate_frames=validate_frames) as nd2:
with ND2File(
file, validate_frames=validate_frames, read_using_sdk=read_using_sdk
) as nd2:
if xarray:
return nd2.to_xarray(delayed=dask)
elif dask:
Expand Down
9 changes: 8 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,17 @@
for x in ALL:
NEW.append(x) if is_new_format(str(x)) else OLD.append(x)

SINGLE = DATA / "dims_t3c2y32x32.nd2"


@pytest.fixture
def single_nd2():
return DATA / "dims_t3c2y32x32.nd2"
return SINGLE


@pytest.fixture(params=ALL[:20])
def small_nd2s(request):
return request.param


@pytest.fixture(params=ALL, ids=lambda x: x.name)
Expand Down
21 changes: 21 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,3 +266,24 @@ def test_chunkmap(validate):
assert isinstance(d, np.ndarray)
assert d.shape == (512, 512)
assert np.array_equal(d[250:255, 250:255], expected)


def test_with_without_sdk(small_nd2s: Path):
with ND2File(small_nd2s, read_using_sdk=True) as withsdk:
ary1 = withsdk.asarray()
dsk1 = withsdk.to_dask()
np.testing.assert_array_equal(ary1, dsk1)
compressed = bool(withsdk.attributes.compressionType)

if not compressed:
with ND2File(small_nd2s, read_using_sdk=False) as nosdk:
ary2 = nosdk.asarray()
dsk2 = nosdk.to_dask()
np.testing.assert_array_equal(ary2, dsk2)
if not nosdk.attributes.compressionType:
np.testing.assert_array_equal(ary1, ary2)
else:
with pytest.raises(
ValueError, match="compressed nd2 files with `read_using_sdk=False`"
):
imread(small_nd2s, read_using_sdk=False)
Loading