Skip to content

Commit

Permalink
Refactor fixup (#51)
Browse files Browse the repository at this point in the history
* Fix fixup parameter so that large ND2 files can be opened in a reasonable amount of time.

* Renamed fixup kwarg to validate_frames in _chunkmap.read_new_chunkmap.
* Set validate_frames to default to False.
* Renamed bad/fixed/safe to bad/fixed/good in read_chunkmap.
* Add validate_frames and search_window kwargs to ND2File.__init__ to pass through to _util.get_reader -> _sdk/latest.ND2Reader.__init__ -> _chunkmap.read_new_chunkmap.

* fix tests

* fix linting and type hinting

Co-authored-by: Talley Lambert <talley.lambert@gmail.com>
  • Loading branch information
shenker and tlambert03 authored May 18, 2022
1 parent 6d846bf commit cbc3bef
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 39 deletions.
54 changes: 28 additions & 26 deletions src/nd2/_chunkmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ class FixedImageMap(TypedDict):
fixed: Set[int] # frames that were bad but fixed
# final mapping of frame number to absolute byte offset starting the chunk
# or None, if the chunk could not be verified
safe: Dict[int, Optional[int]]
good: Dict[int, Optional[int]]


@overload
def read_chunkmap(
file: Union[str, BinaryIO],
*,
fixup: Literal[True] = True,
validate_frames: Literal[True] = True,
legacy: bool = False,
search_window: int = ...,
) -> Tuple[FixedImageMap, Dict[str, int]]:
Expand All @@ -58,7 +58,7 @@ def read_chunkmap(
def read_chunkmap(
file: Union[str, BinaryIO],
*,
fixup: Literal[False],
validate_frames: Literal[False],
legacy: bool = False,
search_window: int = ...,
) -> Tuple[Dict[int, int], Dict[str, int]]:
Expand All @@ -68,7 +68,7 @@ def read_chunkmap(
def read_chunkmap(
file: Union[str, BinaryIO],
*,
fixup=True,
validate_frames=False,
legacy: bool = False,
search_window: int = 100,
):
Expand All @@ -78,38 +78,40 @@ def read_chunkmap(
----------
file : Union[str, BinaryIO]
Filename or file handle to nd2 file.
fixup : bool, optional
validate_frames : bool, optional
Whether to verify (and attempt to fix) frames whose positions have been
shifted relative to the predicted offset (i.e. in a corrupted file),
by default True.
by default False.
legacy : bool, optional
Treat file as legacy nd2 format, by default False
search_window : int, optional
When fixup is true, this is the search window (in KB) that will be used
to try to find the actual chunk position. by default 100 KB
When validate_frames is true, this is the search window (in KB) that will
be used to try to find the actual chunk position. by default 100 KB
Returns
-------
tuple
(image chunk positions, metadata chunk positions). If `fixup` is true,
the image chunk dict will have three keys:
`bad`: estimated frame positions that could not be verified
`fixed`: estimated frame positions that were wrong, but corrected
`safe`: estimated frame positions that were found to be correct.
(image chunk positions, metadata chunk positions). If `validate_frames` is
true, the image chunk dict will have three keys:
`bad`: estimated frame positions that were invalid.
`fixed`: estimated frame positions that were invalid, but corrected.
`good`: estimated frame positions that were already valid.
"""
with ensure_handle(file) as fh:
if not legacy:
return read_new_chunkmap(fh, fixup=fixup, search_window=search_window)
return read_new_chunkmap(
fh, validate_frames=validate_frames, search_window=search_window
)
from ._legacy import legacy_nd2_chunkmap

d = legacy_nd2_chunkmap(fh)
if fixup:
f = {"bad": [], "fixed": [], "safe": dict(enumerate(d.pop(b"LUNK")))}
if validate_frames:
f = {"bad": [], "fixed": [], "good": dict(enumerate(d.pop(b"LUNK")))}
return f, d


def read_new_chunkmap(
fh: BinaryIO, fixup: bool = True, search_window: int = 100
fh: BinaryIO, validate_frames: bool = False, search_window: int = 100
) -> Tuple[Union[Dict[int, int], FixedImageMap], Dict[str, int]]:
"""read the map of the chunks at the end of the file
Expand Down Expand Up @@ -155,18 +157,18 @@ def read_new_chunkmap(
else:
meta_map[name[:-1].decode("ascii")] = position
pos = p + 16
if fixup:
return _fix_frames(fh, image_map, kbrange=search_window), meta_map
if validate_frames:
return _validate_frames(fh, image_map, kbrange=search_window), meta_map
return image_map, meta_map


def _fix_frames(
def _validate_frames(
fh: BinaryIO, images: Dict[int, int], kbrange: int = 100
) -> FixedImageMap:
"""Look for corrupt frames, and try to find their actual positions."""
"""Look for invalid frames, and try to find their actual positions."""
bad: Set[int] = set()
fixed: Set[int] = set()
safe: Dict[int, Optional[int]] = {}
good: Dict[int, Optional[int]] = {}
_lengths = set()
for fnum, _p in images.items():
fh.seek(_p)
Expand All @@ -178,13 +180,13 @@ def _fix_frames(
)
if correct_pos is not None:
fixed.add(fnum)
safe[fnum] = correct_pos + 24 + int(shift)
good[fnum] = correct_pos + 24 + int(shift)
images[fnum] = correct_pos
else:
safe[fnum] = None
good[fnum] = None
else:
safe[fnum] = _p + 24 + int(shift)
return {"bad": bad, "fixed": fixed, "safe": safe}
good[fnum] = _p + 24 + int(shift)
return {"bad": bad, "fixed": fixed, "good": good}


def _search(fh: BinaryIO, string: bytes, guess: int, kbrange: int = 100):
Expand Down
7 changes: 6 additions & 1 deletion src/nd2/_sdk/latest.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ from .. import structures

class ND2Reader:
path: str
def __init__(self, path: Union[str, Path]) -> None: ...
def __init__(
self,
path: Union[str, Path],
validate_frames: bool = False,
search_window: int = 100,
) -> None: ...
def open(self) -> None: ...
def close(self) -> None: ...
def __enter__(self) -> ND2Reader: ...
Expand Down
18 changes: 12 additions & 6 deletions src/nd2/_sdk/latest.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,29 @@ cdef class ND2Reader:
cdef bint _is_open
cdef public dict _frame_map
cdef public dict _meta_map
cdef int _max_safe
cdef int _max_frame_index
cdef _mmap
cdef __strides
cdef __attributes
cdef __dtype
cdef __raw_frame_shape

def __cinit__(self, path: str | Path):
def __cinit__(
self, path: str | Path, validate_frames: bool = False, search_window: int = 100
):
self._is_open = 0
self.__raw_frame_shape = None
self._fh = NULL
self.path = str(path)

with open(path, 'rb') as pyfh:
self._frame_map, self._meta_map = read_new_chunkmap(pyfh)
self._frame_map, self._meta_map = read_new_chunkmap(
pyfh, validate_frames=validate_frames, search_window=search_window
)
if validate_frames:
self._frame_map = self._frame_map['good']

self._max_safe = max(self._frame_map["safe"])
self._max_frame_index = max(self._frame_map)
self.open()

cpdef open(self):
Expand Down Expand Up @@ -244,11 +250,11 @@ cdef class ND2Reader:

cpdef np.ndarray _read_image(self, index: int):
"""Read a chunk directly without using SDK"""
if index > self._max_safe:
if index > self._max_frame_index:
raise IndexError(f"Frame out of range: {index}")
if not self._is_open:
raise ValueError("Attempt to read from closed nd2 file")
offset = self._frame_map["safe"].get(index, None)
offset = self._frame_map.get(index, None)
if offset is None:
return self._missing_frame(index)

Expand Down
8 changes: 6 additions & 2 deletions src/nd2/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,17 @@ def is_supported_file(
return fh.read(4) in (NEW_HEADER_MAGIC, OLD_HEADER_MAGIC)


def get_reader(path: str) -> Union["ND2Reader", "LegacyND2Reader"]:
def get_reader(
path: str, validate_frames: bool = False, search_window: int = 100
) -> Union["ND2Reader", "LegacyND2Reader"]:
with open(path, "rb") as fh:
magic_num = fh.read(4)
if magic_num == NEW_HEADER_MAGIC:
from ._sdk.latest import ND2Reader

return ND2Reader(path)
return ND2Reader(
path, validate_frames=validate_frames, search_window=search_window
)
elif magic_num == OLD_HEADER_MAGIC:
from ._legacy import LegacyND2Reader

Expand Down
25 changes: 23 additions & 2 deletions src/nd2/nd2file.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,30 @@ class ND2File:
_memmap: mmap.mmap
_is_legacy: bool

def __init__(self, path: Union[Path, str]) -> None:
def __init__(
self,
path: Union[Path, str],
validate_frames: bool = False,
search_window: int = 100,
) -> None:
"""Open an nd2 file.
Parameters
----------
path : Union[Path, str]
Filename of an nd2 file.
validate_frames : bool, optional
Whether to verify (and attempt to fix) frames whose positions have been
shifted relative to the predicted offset (i.e. in a corrupted file),
by default False.
search_window : int, optional
When validate_frames is true, this is the search window (in KB) that will
be used to try to find the actual chunk position. by default 100 KB
"""
self._path = str(path)
self._rdr = get_reader(self._path)
self._rdr = get_reader(
self._path, validate_frames=validate_frames, search_window=search_window
)
self._closed = False
self._is_legacy = "Legacy" in type(self._rdr).__name__
self._lock = threading.RLock()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_rescue.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def test_rescue(single_nd2):
# TODO: we could potentially put more of this logic into convenience functions
# we can't do too much magic about guessing shape and dtype since some files
# may not have that information intact
with nd2.ND2File(single_nd2) as rdr:
with nd2.ND2File(single_nd2, validate_frames=True) as rdr:
real_read = rdr.asarray()
raw_frames = [
f.transpose((2, 0, 1, 3)).squeeze()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_new_sdk(new_nd2: Path):
assert isinstance(csize, int)

# sometimes _seq_count is lower than attrs.sequenceCount
# if it is, _seq_count provides the highest "safe" frame you can retrieve.
# if it is, _seq_count provides the highest "good" frame you can retrieve.
if scount != a.get("sequenceCount"):
nd._image(scount - 1)
with pytest.raises(IndexError):
Expand Down

0 comments on commit cbc3bef

Please sign in to comment.