Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor fixup #51

Merged
merged 4 commits into from
May 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 28 additions & 26 deletions src/nd2/_chunkmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ class FixedImageMap(TypedDict):
fixed: Set[int] # frames that were bad but fixed
# final mapping of frame number to absolute byte offset starting the chunk
# or None, if the chunk could not be verified
safe: Dict[int, Optional[int]]
good: Dict[int, Optional[int]]


@overload
def read_chunkmap(
file: Union[str, BinaryIO],
*,
fixup: Literal[True] = True,
validate_frames: Literal[True] = True,
legacy: bool = False,
search_window: int = ...,
) -> Tuple[FixedImageMap, Dict[str, int]]:
Expand All @@ -58,7 +58,7 @@ def read_chunkmap(
def read_chunkmap(
file: Union[str, BinaryIO],
*,
fixup: Literal[False],
validate_frames: Literal[False],
legacy: bool = False,
search_window: int = ...,
) -> Tuple[Dict[int, int], Dict[str, int]]:
Expand All @@ -68,7 +68,7 @@ def read_chunkmap(
def read_chunkmap(
file: Union[str, BinaryIO],
*,
fixup=True,
validate_frames=False,
legacy: bool = False,
search_window: int = 100,
):
Expand All @@ -78,38 +78,40 @@ def read_chunkmap(
----------
file : Union[str, BinaryIO]
Filename or file handle to nd2 file.
fixup : bool, optional
validate_frames : bool, optional
Whether to verify (and attempt to fix) frames whose positions have been
shifted relative to the predicted offset (i.e. in a corrupted file),
by default True.
by default False.
legacy : bool, optional
Treat file as legacy nd2 format, by default False
search_window : int, optional
When fixup is true, this is the search window (in KB) that will be used
to try to find the actual chunk position. by default 100 KB
When validate_frames is true, this is the search window (in KB) that will
be used to try to find the actual chunk position. by default 100 KB

Returns
-------
tuple
(image chunk positions, metadata chunk positions). If `fixup` is true,
the image chunk dict will have three keys:
`bad`: estimated frame positions that could not be verified
`fixed`: estimated frame positions that were wrong, but corrected
`safe`: estimated frame positions that were found to be correct.
(image chunk positions, metadata chunk positions). If `validate_frames` is
true, the image chunk dict will have three keys:
`bad`: estimated frame positions that were invalid.
`fixed`: estimated frame positions that were invalid, but corrected.
`good`: estimated frame positions that were already valid.
"""
with ensure_handle(file) as fh:
if not legacy:
return read_new_chunkmap(fh, fixup=fixup, search_window=search_window)
return read_new_chunkmap(
fh, validate_frames=validate_frames, search_window=search_window
)
from ._legacy import legacy_nd2_chunkmap

d = legacy_nd2_chunkmap(fh)
if fixup:
f = {"bad": [], "fixed": [], "safe": dict(enumerate(d.pop(b"LUNK")))}
if validate_frames:
f = {"bad": [], "fixed": [], "good": dict(enumerate(d.pop(b"LUNK")))}
return f, d


def read_new_chunkmap(
fh: BinaryIO, fixup: bool = True, search_window: int = 100
fh: BinaryIO, validate_frames: bool = False, search_window: int = 100
) -> Tuple[Union[Dict[int, int], FixedImageMap], Dict[str, int]]:
"""read the map of the chunks at the end of the file

Expand Down Expand Up @@ -155,18 +157,18 @@ def read_new_chunkmap(
else:
meta_map[name[:-1].decode("ascii")] = position
pos = p + 16
if fixup:
return _fix_frames(fh, image_map, kbrange=search_window), meta_map
if validate_frames:
return _validate_frames(fh, image_map, kbrange=search_window), meta_map
return image_map, meta_map


def _fix_frames(
def _validate_frames(
fh: BinaryIO, images: Dict[int, int], kbrange: int = 100
) -> FixedImageMap:
"""Look for corrupt frames, and try to find their actual positions."""
"""Look for invalid frames, and try to find their actual positions."""
bad: Set[int] = set()
fixed: Set[int] = set()
safe: Dict[int, Optional[int]] = {}
good: Dict[int, Optional[int]] = {}
_lengths = set()
for fnum, _p in images.items():
fh.seek(_p)
Expand All @@ -178,13 +180,13 @@ def _fix_frames(
)
if correct_pos is not None:
fixed.add(fnum)
safe[fnum] = correct_pos + 24 + int(shift)
good[fnum] = correct_pos + 24 + int(shift)
images[fnum] = correct_pos
else:
safe[fnum] = None
good[fnum] = None
else:
safe[fnum] = _p + 24 + int(shift)
return {"bad": bad, "fixed": fixed, "safe": safe}
good[fnum] = _p + 24 + int(shift)
return {"bad": bad, "fixed": fixed, "good": good}


def _search(fh: BinaryIO, string: bytes, guess: int, kbrange: int = 100):
Expand Down
7 changes: 6 additions & 1 deletion src/nd2/_sdk/latest.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ from .. import structures

class ND2Reader:
path: str
def __init__(self, path: Union[str, Path]) -> None: ...
def __init__(
self,
path: Union[str, Path],
validate_frames: bool = False,
search_window: int = 100,
) -> None: ...
def open(self) -> None: ...
def close(self) -> None: ...
def __enter__(self) -> ND2Reader: ...
Expand Down
18 changes: 12 additions & 6 deletions src/nd2/_sdk/latest.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,29 @@ cdef class ND2Reader:
cdef bint _is_open
cdef public dict _frame_map
cdef public dict _meta_map
cdef int _max_safe
cdef int _max_frame_index
cdef _mmap
cdef __strides
cdef __attributes
cdef __dtype
cdef __raw_frame_shape

def __cinit__(self, path: str | Path):
def __cinit__(
self, path: str | Path, validate_frames: bool = False, search_window: int = 100
):
self._is_open = 0
self.__raw_frame_shape = None
self._fh = NULL
self.path = str(path)

with open(path, 'rb') as pyfh:
self._frame_map, self._meta_map = read_new_chunkmap(pyfh)
self._frame_map, self._meta_map = read_new_chunkmap(
pyfh, validate_frames=validate_frames, search_window=search_window
)
if validate_frames:
self._frame_map = self._frame_map['good']

self._max_safe = max(self._frame_map["safe"])
self._max_frame_index = max(self._frame_map)
self.open()

cpdef open(self):
Expand Down Expand Up @@ -244,11 +250,11 @@ cdef class ND2Reader:

cpdef np.ndarray _read_image(self, index: int):
"""Read a chunk directly without using SDK"""
if index > self._max_safe:
if index > self._max_frame_index:
raise IndexError(f"Frame out of range: {index}")
if not self._is_open:
raise ValueError("Attempt to read from closed nd2 file")
offset = self._frame_map["safe"].get(index, None)
offset = self._frame_map.get(index, None)
if offset is None:
return self._missing_frame(index)

Expand Down
8 changes: 6 additions & 2 deletions src/nd2/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,17 @@ def is_supported_file(
return fh.read(4) in (NEW_HEADER_MAGIC, OLD_HEADER_MAGIC)


def get_reader(path: str) -> Union["ND2Reader", "LegacyND2Reader"]:
def get_reader(
path: str, validate_frames: bool = False, search_window: int = 100
) -> Union["ND2Reader", "LegacyND2Reader"]:
with open(path, "rb") as fh:
magic_num = fh.read(4)
if magic_num == NEW_HEADER_MAGIC:
from ._sdk.latest import ND2Reader

return ND2Reader(path)
return ND2Reader(
path, validate_frames=validate_frames, search_window=search_window
)
elif magic_num == OLD_HEADER_MAGIC:
from ._legacy import LegacyND2Reader

Expand Down
25 changes: 23 additions & 2 deletions src/nd2/nd2file.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,30 @@ class ND2File:
_memmap: mmap.mmap
_is_legacy: bool

def __init__(self, path: Union[Path, str]) -> None:
def __init__(
self,
path: Union[Path, str],
validate_frames: bool = False,
search_window: int = 100,
) -> None:
"""Open an nd2 file.

Parameters
----------
path : Union[Path, str]
Filename of an nd2 file.
validate_frames : bool, optional
Whether to verify (and attempt to fix) frames whose positions have been
shifted relative to the predicted offset (i.e. in a corrupted file),
by default False.
search_window : int, optional
When validate_frames is true, this is the search window (in KB) that will
be used to try to find the actual chunk position. by default 100 KB
"""
self._path = str(path)
self._rdr = get_reader(self._path)
self._rdr = get_reader(
self._path, validate_frames=validate_frames, search_window=search_window
)
self._closed = False
self._is_legacy = "Legacy" in type(self._rdr).__name__
self._lock = threading.RLock()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_rescue.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def test_rescue(single_nd2):
# TODO: we could potentially put more of this logic into convenience functions
# we can't do too much magic about guessing shape and dtype since some files
# may not have that information intact
with nd2.ND2File(single_nd2) as rdr:
with nd2.ND2File(single_nd2, validate_frames=True) as rdr:
real_read = rdr.asarray()
raw_frames = [
f.transpose((2, 0, 1, 3)).squeeze()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_new_sdk(new_nd2: Path):
assert isinstance(csize, int)

# sometimes _seq_count is lower than attrs.sequenceCount
# if it is, _seq_count provides the highest "safe" frame you can retrieve.
# if it is, _seq_count provides the highest "good" frame you can retrieve.
if scount != a.get("sequenceCount"):
nd._image(scount - 1)
with pytest.raises(IndexError):
Expand Down