Skip to content

Commit

Permalink
mrcz format
Browse files Browse the repository at this point in the history
  • Loading branch information
jlaehne committed Dec 8, 2022
1 parent 1ddd742 commit 7bbb000
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 39 deletions.
41 changes: 3 additions & 38 deletions docs/supported_formats/mrcz.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,43 +22,8 @@ Preprint: https://www.biorxiv.org/content/10.1101/116533v1
Support for this format is not enabled by default. In order to enable it
install the `mrcz` and optionally the `blosc` Python packages.

Extra saving arguments
^^^^^^^^^^^^^^^^^^^^^^

- ``do_async``: currently supported within RosettaSciIO for writing only, this will
save the file in a background thread and return immediately. Defaults
to `False`.

.. Warning::

There is no method currently implemented within RosettaSciIO to tell if an
asychronous write has finished.


- ``compressor``: The compression codec, one of [`None`,`'zlib`',`'zstd'`, `'lz4'`].
Defaults to `None`.
- ``clevel``: The compression level, an `int` from 1 to 9. Defaults to 1.
- ``n_threads``: The number of threads to use for 'blosc' compression. Defaults to
the maximum number of virtual cores (including Intel Hyperthreading)
on your system, which is recommended for best performance. If \
``do_async = True`` you may wish to leave one thread free for the
Python GIL.

The recommended compression codec is 'zstd' (zStandard) with `clevel=1` for
general use. If speed is critical, use 'lz4' (LZ4) with `clevel=9`. Integer data
compresses more redably than floating-point data, and in general the histogram
of values in the data reflects how compressible it is.

To save files that are compatible with other programs that can use MRC such as
GMS, IMOD, Relion, MotionCorr, etc. save with `compressor=None`, extension `.mrc`.
JSON metadata will not be recognized by other MRC-supporting software but should
not cause crashes.

Example Usage
API functions
^^^^^^^^^^^^^

.. code-block:: python
>>> s.save('file.mrcz', do_async=True, compressor='zstd', clevel=1)
>>> new_signal = hs.load('file.mrcz')
.. automodule:: rsciio.mrcz
:members:
1 change: 1 addition & 0 deletions rsciio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"JEOL",
"JobinYvon",
"MRC",
"MRCZ",
"netCDF",
"NeXus",
"Phenom",
Expand Down
14 changes: 14 additions & 0 deletions rsciio/mrcz/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from ._api import (
file_reader,
file_writer,
)


__all__ = [
"file_reader",
"file_writer",
]


def __dir__():
return sorted(__all__)
78 changes: 77 additions & 1 deletion rsciio/mrcz/api.py → rsciio/mrcz/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
from packaging.version import Version
import mrcz as _mrcz
import logging

from rsciio.docstrings import (
FILENAME_DOC,
LAZY_DOC,
ENDIANESS_DOC,
RETURNS_DOC,
SIGNAL_DOC,
)
from rsciio.utils.tools import DTBox


Expand Down Expand Up @@ -67,7 +75,24 @@ def _parse_metadata(metadata):
}


def file_reader(filename, endianess="<", lazy=False, mmap_mode="c", **kwds):
def file_reader(filename, lazy=False, endianess="<", mmap_mode="c", **kwds):
"""File reader for the MRCZ format for tomographic data.
Parameters
----------
%s
%s
%s
mmap_mode : str, Default="c"
The MRCZ reader currently only supports C-ordering memory-maps.
%s
Examples
--------
>>> from rsciio.mrcz import file_reader
>>> new_signal = file_reader('file.mrcz')
"""
_logger.debug("Reading MRCZ file: %s" % filename)

if mmap_mode != "c":
Expand Down Expand Up @@ -115,9 +140,53 @@ def file_reader(filename, endianess="<", lazy=False, mmap_mode="c", **kwds):
]


file_reader.__doc__ %= (FILENAME_DOC, LAZY_DOC, ENDIANESS_DOC, RETURNS_DOC)


def file_writer(
filename, signal, do_async=False, compressor=None, clevel=1, n_threads=None, **kwds
):
"""
Write signal to MRCZ format.
Parameters
----------
%s
%s
%s
do_async : bool, Default=False
Currently supported within RosettaSciIO for writing only, this will
save the file in a background thread and return immediately.
Warning: there is no method currently implemented within RosettaSciIO
to tell if an asychronous write has finished.
compressor : {None, "zlib", "zstd", "lz4"}, Default=None
The compression codec.
clevel : int, Default=1
The compression level, an ``int`` from 1 to 9.
n_threads : int
The number of threads to use for ``blosc`` compression. Defaults to
the maximum number of virtual cores (including Intel Hyperthreading)
on your system, which is recommended for best performance. If
``do_async = True`` you may wish to leave one thread free for the
Python GIL.
Notes
-----
The recommended compression codec is ``zstd`` (zStandard) with ``clevel=1`` for
general use. If speed is critical, use ``lz4`` (LZ4) with ``clevel=9``. Integer data
compresses more redably than floating-point data, and in general the histogram
of values in the data reflects how compressible it is.
To save files that are compatible with other programs that can use MRC such as
GMS, IMOD, Relion, MotionCorr, etc. save with ``compressor=None``, extension ``.mrc``.
JSON metadata will not be recognized by other MRC-supporting software but should
not cause crashes.
Examples
--------
>>> from rsciio.mrcz import file_writer
>>> file_writer('file.mrcz', signal, do_async=True, compressor='zstd', clevel=1)
"""
endianess = kwds.pop("endianess", "<")
mrcz_endian = "le" if endianess == "<" else "be"

Expand Down Expand Up @@ -162,3 +231,10 @@ def file_writer(
clevel=clevel,
n_threads=n_threads,
)


file_writer.__doc__ %= (
FILENAME_DOC.replace("read", "write to"),
SIGNAL_DOC,
ENDIANESS_DOC,
)
6 changes: 6 additions & 0 deletions rsciio/tests/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ def test_dir_plugins():

assert dir(mrc) == ["file_reader"]

# mrcz is an optional dependency
pytest.importorskip("mrcz")
from rsciio import mrcz

assert dir(mrcz) == ["file_reader", "file_writer"]

from rsciio import netcdf

assert dir(netcdf) == ["file_reader"]
Expand Down

0 comments on commit 7bbb000

Please sign in to comment.