Skip to content

Commit

Permalink
Add zarr_rechunk command line tool
Browse files Browse the repository at this point in the history
  • Loading branch information
blowekamp committed Mar 29, 2023
1 parent bb0dbb2 commit 9deb718
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 0 deletions.
67 changes: 67 additions & 0 deletions pytools/zarr_rechunk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import zarr
import click
import logging
from pathlib import Path
from pytools import __version__

logger = logging.getLogger(__name__)


def _chunk_logic_dim(drequest: int, dshape: int) -> int:
if dshape > drequest > 0:
return drequest
return dshape


def rechunk_group(group: zarr.Group, chunk_size: int):
logger.info(f'Processing group: "{group.name}"...')
logger.debug(group)

for group_name, child_group in group.groups():
if group_name != "OME":
rechunk_group(child_group, chunk_size)

# grok through the OME-NGFF meta-dat, for each image scale (dataset/array) with axes information
# https://ngff.openmicroscopy.org/latest/#multiscale-md
if "multiscales" in group.attrs:
for image in group.attrs["multiscales"]:
chunk_request = tuple(chunk_size if a["type"] == "space" else -1 for a in image["axes"])

for dataset in image["datasets"]:
arr = group[dataset["path"]]
logger.info(f'Processing array: "{arr.name}"...')
logger.debug(arr.info)

chunks = tuple(_chunk_logic_dim(r, s) for r, s in zip(chunk_request, arr.shape))
if arr.chunks == chunks:
logger.info("Chunks already requested size")
continue

group[dataset["path"]] = zarr.array(arr, chunks=chunks)
logger.debug(group[dataset["path"]].info)


@click.command()
@click.argument("input_zarr", type=click.Path(exists=True, dir_okay=True, readable=True, path_type=Path))
@click.option(
"--log-level", default="INFO", type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False)
)
@click.option(
"--chunk-size",
default=64,
show_default=True,
type=click.IntRange(min=1),
help="The size of zarr chunks stored in spatial dimensions.",
)
@click.version_option(__version__)
def main(input_zarr, log_level, chunk_size):
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.getLevelName(log_level))

store = zarr.DirectoryStore(input_zarr)
root = zarr.group(store=store)

rechunk_group(root, chunk_size)


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SimpleITK ~=2.2.0
click >=7.1
numpy >=1.21
zarr
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"console_scripts": [
"mrc2nifti = pytools.ng.mrc2nifti:main",
"mrc_visual_min_max = pytools.ng.build_histogram:main",
"zarr_rechunk = pytools.zarr_rechunk:main",
]
},
classifiers=[
Expand Down
12 changes: 12 additions & 0 deletions test/test_zarr_rechunk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from click.testing import CliRunner
import pytools.zarr_rechunk
import pytest

args = ["--help", "--version"]


@pytest.mark.parametrize("cli_args", args)
def test_mrc2nifti_main_help(cli_args):
runner = CliRunner()
result = runner.invoke(pytools.zarr_rechunk.main, cli_args.split())
assert not result.exception

0 comments on commit 9deb718

Please sign in to comment.