tlambert03 · tlambert03 · Jun 25, 2022 · Jun 25, 2022 · Jun 25, 2022
diff --git a/src/nd2/_sdk/latest.pyi b/src/nd2/_sdk/latest.pyi
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Any, Dict, List, Sequence, Tuple, Union
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
@@ -12,6 +12,7 @@ class ND2Reader:
         path: Union[str, Path],
         validate_frames: bool = False,
         search_window: int = 100,
+        read_using_sdk: Optional[bool] = None,
     ) -> None: ...
     def open(self) -> None: ...
     def close(self) -> None: ...

diff --git a/src/nd2/_sdk/latest.pyx b/src/nd2/_sdk/latest.pyx
@@ -1,7 +1,8 @@
 import json
 import mmap
+import warnings
 from pathlib import Path
-from typing import List, Sequence, Tuple
+from typing import List, Optional, Sequence, Tuple
 
 import numpy as np
 
@@ -29,38 +30,69 @@ cdef class ND2Reader:
     cdef __attributes
     cdef __dtype
     cdef __raw_frame_shape
+    cdef public _read_image
+    cdef public _read_using_sdk
+    cdef _wants_read_using_sdk
 
     def __cinit__(
-        self, path: str | Path, validate_frames: bool = False, search_window: int = 100
+        self,
+        path: str | Path,
+        validate_frames: bool = False,
+        search_window: int = 100,
+        read_using_sdk: Optional[bool] = None,
     ):
         self._is_open = 0
         self.__raw_frame_shape = None
         self._fh = NULL
         self.path = str(path)
+        self._wants_read_using_sdk = read_using_sdk
+        self.open()
 
-        with open(path, 'rb') as pyfh:
-            self._frame_map, self._meta_map = read_new_chunkmap(
-                pyfh, validate_frames=validate_frames, search_window=search_window
-            )
-        if validate_frames:
-            self._frame_map = self._frame_map['good']
+        if read_using_sdk is None:
+            read_using_sdk = self.attributes.compressionType is not None
+        self._read_using_sdk = read_using_sdk
+
+        if self._read_using_sdk:
+            self._read_image = self._read_image_with_sdk
+            self._frame_map, self._meta_map = {}, {}
+            self._max_frame_index = 0
+        else:
+            self._read_image = self._read_image_from_memmap
+
+            with open(path, 'rb') as pyfh:
+                self._frame_map, self._meta_map = read_new_chunkmap(
+                    pyfh, validate_frames=validate_frames, search_window=search_window
+                )
+            if validate_frames:
+                self._frame_map = self._frame_map['good']
+
+            self._max_frame_index = max(self._frame_map)
 
-        self._max_frame_index = max(self._frame_map)
-        self.open()
 
     cpdef open(self):
         if not self._is_open:
             self._fh = Lim_FileOpenForReadUtf8(self.path)
             if not self._fh:
                 raise OSError("Could not open file: %s" % self.path)
-            with open(self.path, 'rb') as fh:
-                self._mmap = mmap.mmap(fh.fileno(), 0, access=mmap.ACCESS_READ)
             self._is_open = 1
 
+            if self._wants_read_using_sdk is None:
+                self._read_using_sdk = self.attributes.compressionType is not None
+            else:
+                self._read_using_sdk = self._wants_read_using_sdk
+                if self.attributes.compressionType is not None and self._wants_read_using_sdk is False:
+                    Lim_FileClose(self._fh)
+                    raise ValueError("Cannot read compressed nd2 files with `read_using_sdk=False`")
+
+            if not self._read_using_sdk:
+                with open(self.path, 'rb') as fh:
+                    self._mmap = mmap.mmap(fh.fileno(), 0, access=mmap.ACCESS_READ)
+
     cpdef close(self):
         if self._is_open:
             Lim_FileClose(self._fh)
-            self._mmap.close()
+            if not self._read_using_sdk:
+                self._mmap.close()
             self._is_open = 0
 
     def __enter__(self):
@@ -191,20 +223,6 @@ cdef class ND2Reader:
                 "Sequence %d out of range (sequence count: %d)" % (seq_index, seq_count)
             )
 
-    def _image(self, LIMUINT seq_index):
-        self._validate_seq(seq_index)
-
-        cdef LIMPICTURE pic = nullpic()
-        cdef LIMRESULT result = Lim_FileGetImageData(self._fh, seq_index, &pic)
-
-        if result != 0:
-            error = LIM_ERR_CODE[result]
-            raise RuntimeError('Error retrieving image data: %s' % error)
-
-        array_wrapper = PicWrapper()
-        array_wrapper.set_pic(pic, Lim_DestroyPicture)
-        return array_wrapper.to_ndarray()
-
     def _custom_data(self) -> dict:
         from .._xml import parse_xml_block
 
@@ -245,7 +263,21 @@ cdef class ND2Reader:
             self.__dtype = np.dtype(f"{d}{a.bitsPerComponentInMemory // 8}")
         return self.__dtype
 
-    cpdef np.ndarray _read_image(self, index: int):
+    def _read_image_with_sdk(self, LIMUINT seq_index):
+        self._validate_seq(seq_index)
+
+        cdef LIMPICTURE pic = nullpic()
+        cdef LIMRESULT result = Lim_FileGetImageData(self._fh, seq_index, &pic)
+
+        if result != 0:
+            error = LIM_ERR_CODE[result]
+            raise RuntimeError('Error retrieving image data: %s' % error)
+
+        array_wrapper = PicWrapper()
+        array_wrapper.set_pic(pic, Lim_DestroyPicture)
+        return array_wrapper.to_ndarray()
+
+    cpdef np.ndarray _read_image_from_memmap(self, index: int):
         """Read a chunk directly without using SDK"""
         if index > self._max_frame_index:
             raise IndexError(f"Frame out of range: {index}")
@@ -275,6 +307,7 @@ cdef class ND2Reader:
                 count=np.prod(self._raw_frame_shape()),
                 offset=offset
             )  # this will be reshaped in nd2file.py
+
         except ValueError:
             # If the chunkmap is wrong, and the mmap isn't long enough
             # for the requested offset & size, a ValueError is raised.

diff --git a/src/nd2/_util.py b/src/nd2/_util.py
@@ -1,6 +1,6 @@
 import re
 from datetime import datetime
-from typing import IO, TYPE_CHECKING, Any, Callable, NamedTuple, Union
+from typing import IO, TYPE_CHECKING, Any, Callable, NamedTuple, Optional, Union
 
 if TYPE_CHECKING:
     from os import PathLike
@@ -38,15 +38,21 @@ def is_supported_file(
 
 
 def get_reader(
-    path: str, validate_frames: bool = False, search_window: int = 100
+    path: str,
+    validate_frames: bool = False,
+    search_window: int = 100,
+    read_using_sdk: Optional[bool] = None,
 ) -> Union["ND2Reader", "LegacyND2Reader"]:
     with open(path, "rb") as fh:
         magic_num = fh.read(4)
         if magic_num == NEW_HEADER_MAGIC:
             from ._sdk.latest import ND2Reader
 
             return ND2Reader(
-                path, validate_frames=validate_frames, search_window=search_window
+                path,
+                validate_frames=validate_frames,
+                search_window=search_window,
+                read_using_sdk=read_using_sdk,
             )
         elif magic_num == OLD_HEADER_MAGIC:
             from ._legacy import LegacyND2Reader

diff --git a/src/nd2/nd2file.py b/src/nd2/nd2file.py
@@ -31,7 +31,7 @@
 if TYPE_CHECKING:
     from typing import Any, Dict, List, Tuple
 
-    import dask.array as da
+    import dask.array.core
     import xarray as xr
     from typing_extensions import Literal
 
@@ -51,8 +51,10 @@ class ND2File:
     def __init__(
         self,
         path: Union[Path, str],
+        *,
         validate_frames: bool = False,
         search_window: int = 100,
+        read_using_sdk: bool = None,
     ) -> None:
         """Open an nd2 file.
 
@@ -68,10 +70,19 @@ def __init__(
         search_window : int
             When validate_frames is true, this is the search window (in KB) that will
             be used to try to find the actual chunk position. by default 100 KB
+        read_using_sdk : Optional[bool]
+            If `True`, use the SDK to read the file. If `False`, inspects the chunkmap
+            and reads from a `numpy.memmap`. If `None` (the default), uses the SDK if
+            the file is compressed, otherwise uses the memmap. Note: using
+            `read_using_sdk=False` on a compressed file will result in a ValueError.
+
         """
         self._path = str(path)
         self._rdr = get_reader(
-            self._path, validate_frames=validate_frames, search_window=search_window
+            self._path,
+            validate_frames=validate_frames,
+            search_window=search_window,
+            read_using_sdk=read_using_sdk,
         )
         self._closed = False
         self._is_legacy = "Legacy" in type(self._rdr).__name__
@@ -313,7 +324,7 @@ def __array__(self) -> np.ndarray:
         """array protocol"""
         return self.asarray()
 
-    def to_dask(self, wrapper=True, copy=True) -> da.Array:
+    def to_dask(self, wrapper=True, copy=True) -> dask.array.core.Array:
         """Create dask array (delayed reader) representing image.
 
         This generally works well, but it remains to be seen whether performance
@@ -328,21 +339,21 @@ def to_dask(self, wrapper=True, copy=True) -> da.Array:
         wrapper : bool
             If True (the default), the returned obect will be a thin subclass of
             a :class:`dask.array.Array` (an
-            `ResourceBackedDaskArray`) that manages the opening
-            and closing of this file when getting chunks via compute(). If `wrapper`
-            is `False`, then a pure `da.Array` will be returned. However, when that
-            array is computed, it will incur a file open/close on *every* chunk
-            that is read (in the `_dask_block` method).  As such `wrapper`
-            will generally be much faster, however, it *may* fail (i.e. result in
-            segmentation faults) with certain dask schedulers.
+            `ResourceBackedDaskArray`) that manages the opening and closing of this file
+            when getting chunks via compute(). If `wrapper` is `False`, then a pure
+            `dask.array.core.Array` will be returned. However, when that array is
+            computed, it will incur a file open/close on *every* chunk that is read (in
+            the `_dask_block` method).  As such `wrapper` will generally be much faster,
+            however, it *may* fail (i.e. result in segmentation faults) with certain
+            dask schedulers.
         copy : bool
             If `True` (the default), the dask chunk-reading function will return
             an array copy. This can avoid segfaults in certain cases, though it
             may also add overhead.
 
         Returns
         -------
-        da.Array
+        dask.array.core.Array
         """
         from dask.array import map_blocks
 
@@ -566,38 +577,46 @@ def __repr__(self) -> str:
 @overload
 def imread(
     file: Union[Path, str],
-    dask: Literal[False] = False,
-    xarray: Literal[False] = False,
+    *,
+    dask: Literal[False],
+    xarray: Literal[False],
     validate_frames: bool = False,
+    read_using_sdk: Optional[bool] = None,
 ) -> np.ndarray:
     ...
 
 
 @overload
 def imread(
     file: Union[Path, str],
+    *,
     dask: bool = ...,
-    xarray: Literal[True] = True,
+    xarray: Literal[True],
     validate_frames: bool = False,
+    read_using_sdk: Optional[bool] = None,
 ) -> xr.DataArray:
     ...
 
 
 @overload
 def imread(
     file: Union[Path, str],
-    dask: Literal[True] = ...,
-    xarray=False,
+    *,
+    dask: Literal[True],
+    xarray: Literal[False],
     validate_frames: bool = False,
-) -> da.Array:
+    read_using_sdk: Optional[bool] = None,
+) -> dask.array.core.Array:
     ...
 
 
 def imread(
     file: Union[Path, str],
+    *,
     dask: bool = False,
     xarray: bool = False,
     validate_frames: bool = False,
+    read_using_sdk: Optional[bool] = None,
 ):
     """Open `file`, return requested array type, and close `file`.
 
@@ -620,13 +639,21 @@ def imread(
         shifted relative to the predicted offset (i.e. in a corrupted file).
         This comes at a slight performance penalty at file open, but may "rescue"
         some corrupt files. by default False.
+    read_using_sdk : Optional[bool]
+        If `True`, use the SDK to read the file. If `False`, inspects the chunkmap and
+        reads from a `numpy.memmap`. If `None` (the default), uses the SDK if the file
+        is compressed, otherwise uses the memmap.
+        Note: using `read_using_sdk=False` on a compressed file will result in a
+        ValueError.
 
     Returns
     -------
     Union[np.ndarray, dask.array.Array, xarray.DataArray]
         Array subclass, depending on arguments used.
     """
-    with ND2File(file, validate_frames=validate_frames) as nd2:
+    with ND2File(
+        file, validate_frames=validate_frames, read_using_sdk=read_using_sdk
+    ) as nd2:
         if xarray:
             return nd2.to_xarray(delayed=dask)
         elif dask:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -18,10 +18,17 @@
 for x in ALL:
     NEW.append(x) if is_new_format(str(x)) else OLD.append(x)
 
+SINGLE = DATA / "dims_t3c2y32x32.nd2"
+
 
 @pytest.fixture
 def single_nd2():
-    return DATA / "dims_t3c2y32x32.nd2"
+    return SINGLE
+
+
+@pytest.fixture(params=ALL[:20])
+def small_nd2s(request):
+    return request.param
 
 
 @pytest.fixture(params=ALL, ids=lambda x: x.name)

diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -266,3 +266,24 @@ def test_chunkmap(validate):
     assert isinstance(d, np.ndarray)
     assert d.shape == (512, 512)
     assert np.array_equal(d[250:255, 250:255], expected)
+
+
+def test_with_without_sdk(small_nd2s: Path):
+    with ND2File(small_nd2s, read_using_sdk=True) as withsdk:
+        ary1 = withsdk.asarray()
+        dsk1 = withsdk.to_dask()
+        np.testing.assert_array_equal(ary1, dsk1)
+        compressed = bool(withsdk.attributes.compressionType)
+
+    if not compressed:
+        with ND2File(small_nd2s, read_using_sdk=False) as nosdk:
+            ary2 = nosdk.asarray()
+            dsk2 = nosdk.to_dask()
+            np.testing.assert_array_equal(ary2, dsk2)
+            if not nosdk.attributes.compressionType:
+                np.testing.assert_array_equal(ary1, ary2)
+    else:
+        with pytest.raises(
+            ValueError, match="compressed nd2 files with `read_using_sdk=False`"
+        ):
+            imread(small_nd2s, read_using_sdk=False)