Skip to content

Commit

Permalink
Refactor bf2raw command to be centrally placed
Browse files Browse the repository at this point in the history
  • Loading branch information
annshress committed Sep 25, 2023
1 parent fec2031 commit 188e108
Show file tree
Hide file tree
Showing 13 changed files with 196 additions and 159 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ jobs:
python -m pip install --upgrade pip
pip install -e . -r requirements.txt
- name: Directory setup
run: |
cp .env.sample .env
- name: Test Coverage
continue-on-error: true
run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ coverage.xml
.hypothesis/
.pytest_cache/

test/tmp/

# Translations
*.mo
*.pot
Expand Down
47 changes: 39 additions & 8 deletions em_workflows/brt/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- Finally, we ``POST`` the JSON datastructure to the API, and cleanup temp dirs.
"""

from typing import Dict
import glob
import os
from em_workflows.file_path import FilePath
Expand All @@ -45,6 +46,7 @@
from prefect import task, Flow, Parameter, unmapped
from prefect.run_configs import LocalRun
from prefect.engine import signals
from pytools.workflow_functions import visual_min_max

from em_workflows.utils import utils
from em_workflows.utils import neuroglancer as ng
Expand Down Expand Up @@ -475,6 +477,41 @@ def cleanup_files(file_path: FilePath, pattern=str):
# return inputs_paired


@task
def gen_zarr(fp_in: FilePath):
file_path = fp_in
# fallback mrc file
input_file = file_path.fp_in.as_posix()

rec_mrc = file_path.gen_output_fp(output_ext="_rec.mrc")
if rec_mrc.is_file():
input_file = rec_mrc.as_posix()

ng.bioformats_gen_zarr(
file_path=file_path,
input_fname=input_file,
depth=BRT_DEPTH,
width=BRT_WIDTH,
height=BRT_HEIGHT,
resolutions=1,
)
ng.zarr_build_multiscales(file_path)


@task
def gen_ng_metadata(fp_in: FilePath) -> Dict:
file_path = fp_in
asset_fp = Path(f"{file_path.assets_dir}/{file_path.base}.zarr")

first_zarr_arr = Path(asset_fp.as_posix() + "/0")

ng_asset = file_path.gen_asset(
asset_type=AssetType.NEUROGLANCER_ZARR, asset_fp=asset_fp
)
ng_asset["metadata"] = visual_min_max(mad_scale=5, input_image=first_zarr_arr)
return ng_asset


with Flow(
"brt_flow",
executor=BRTConfig.SLURM_EXECUTOR,
Expand Down Expand Up @@ -595,14 +632,8 @@ def cleanup_files(file_path: FilePath, pattern=str):
)
# finished volslicer inputs.

# START PYRAMID GEN
pyramid_assets = ng.gen_zarr.map(
fp_in=fps,
depth=unmapped(BRT_DEPTH),
width=unmapped(BRT_WIDTH),
height=unmapped(BRT_HEIGHT),
upstream_tasks=[brts],
)
zarrs = gen_zarr.map(fp_in=fps, upstream_tasks=[brts])
pyramid_assets = gen_ng_metadata.map(fp_in=fps, upstream_tasks=[zarrs])
# archive_pyramid_cmds = ng.gen_archive_pyr.map(
# file_path=fps, upstream_tasks=[pyramid_assets]
# )
Expand Down
1 change: 1 addition & 0 deletions em_workflows/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

LARGE_DIM = 1024
SMALL_DIM = 300
RECHUNK_SIZE = 512

BIOFORMATS_NUM_WORKERS = 3
# This is expected to be less than the available memory for a dask worker
Expand Down
1 change: 0 additions & 1 deletion em_workflows/czi/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
VALID_CZI_INPUTS = ["czi", "CZI"]
RECHUNK_SIZE = 512
SITK_COMPRESSION_LVL = 90
THUMB_X_DIM = 300
THUMB_Y_DUM = 300
49 changes: 7 additions & 42 deletions em_workflows/czi/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
from pytools.HedwigZarrImages import HedwigZarrImages
from em_workflows.file_path import FilePath
from em_workflows.utils import utils
from em_workflows.utils import neuroglancer as ng
from prefect.run_configs import LocalRun
from em_workflows.constants import BIOFORMATS_NUM_WORKERS
from em_workflows.czi.constants import (
RECHUNK_SIZE,
VALID_CZI_INPUTS,
THUMB_X_DIM,
THUMB_Y_DUM,
Expand All @@ -18,12 +17,6 @@
from em_workflows.czi.config import CZIConfig


def rechunk_zarr(zarr_fp: Path) -> None:
images = HedwigZarrImages(zarr_fp, read_only=False)
for _, image in images.series():
image.rechunk(RECHUNK_SIZE)


def gen_thumb(image: HedwigZarrImage, file_path: FilePath, image_name: str) -> dict:
sitk_image = image.extract_2d(
target_size_x=THUMB_X_DIM, target_size_y=THUMB_Y_DUM, auto_uint8=True
Expand Down Expand Up @@ -84,42 +77,14 @@ def gen_imageSet(file_path: FilePath) -> List:
return image_set


def bioformats_gen_zarr(file_path: FilePath):
"""
TODO, refactor this into ng.gen_zarr
bioformats2raw --max_workers=$nproc --downsample-type AREA
--compression=blosc --compression-properties cname=zstd
--compression-properties clevel=5 --compression-properties shuffle=1
input.tiff output.zarr
"""
input_czi = f"{file_path.proj_dir}/{file_path.base}.czi"
output_zarr = f"{file_path.working_dir}/{file_path.base}.zarr"
log_fp = f"{file_path.working_dir}/{file_path.base}_as_zarr.log"
cmd = [
CZIConfig.bioformats2raw,
f"--max_workers={BIOFORMATS_NUM_WORKERS}",
"--overwrite",
"--downsample-type",
"AREA",
"--compression=blosc",
"--compression-properties",
"cname=zstd",
"--compression-properties",
"clevel=5",
"--compression-properties",
"shuffle=1",
input_czi,
output_zarr,
]
FilePath.run(cmd, log_fp)
output_zarr = f"{file_path.working_dir}/{file_path.base}.zarr"
file_path.copy_to_assets_dir(fp_to_cp=Path(output_zarr))
rechunk_zarr(zarr_fp=Path(output_zarr))


@task
def generate_czi_imageset(file_path: FilePath):
bioformats_gen_zarr(file_path)
input_czi = f"{file_path.proj_dir}/{file_path.base}.czi"
ng.bioformats_gen_zarr(
file_path=file_path,
input_fname=input_czi,
rechunk=True,
)
imageSet = gen_imageSet(file_path=file_path)
# extract images from input file, used to create imageSet elements
return imageSet
Expand Down
60 changes: 20 additions & 40 deletions em_workflows/lrg_2d_rgb/flow.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import os
from typing import Dict
from pathlib import Path
import SimpleITK as sitk
from pytools import HedwigZarrImage, HedwigZarrImages
from prefect import Flow, task, Parameter
from prefect.run_configs import LocalRun

from em_workflows.utils import utils
from em_workflows.utils import neuroglancer as ng
from em_workflows.file_path import FilePath
from em_workflows.constants import AssetType, BIOFORMATS_NUM_WORKERS, JAVA_MAX_HEAP_SIZE
from em_workflows.constants import AssetType
from em_workflows.lrg_2d_rgb.config import LRG2DConfig
from em_workflows.lrg_2d_rgb.constants import (
LARGE_THUMB_X,
Expand Down Expand Up @@ -46,45 +47,23 @@ def convert_png_to_tiff(file_path: FilePath):


@task
def bioformats_gen_zarr(file_path: FilePath):
"""
TODO, refactor this into ng.gen_zarr
bioformats2raw --scale-format-string '%2$d' --downsample-type AREA
--compression=blosc --compression-properties cname=zstd
--compression-properties clevel=5 --compression-properties shuffle=1
input.tiff output.zarr
"""

def gen_zarr(file_path: FilePath) -> None:
input_tiff = f"{file_path.working_dir}/{file_path.base}.tiff"
output_zarr = f"{file_path.working_dir}/{file_path.base}.zarr"
log_fp = f"{file_path.working_dir}/{file_path.base}_as_zarr.log"
# increase java max heap size for bf2raw command (just in case)
os.environ["_JAVA_OPTIONS"] = JAVA_MAX_HEAP_SIZE
cmd = [
LRG2DConfig.bioformats2raw,
f"--max_workers={BIOFORMATS_NUM_WORKERS}",
"--overwrite",
"--scale-format-string",
"%2$d",
"--downsample-type",
"AREA",
"--compression=blosc",
"--compression-properties",
"cname=zstd",
"--compression-properties",
"clevel=5",
"--compression-properties",
"shuffle=1",
input_tiff,
output_zarr,
]
FilePath.run(cmd, log_fp)
zarr_images = HedwigZarrImages(zarr_path=Path(output_zarr), read_only=False)
zarr_image: HedwigZarrImage = zarr_images[list(zarr_images.get_series_keys())[0]]
zarr_image.rechunk(512)
asset_fp = file_path.copy_to_assets_dir(fp_to_cp=Path(output_zarr))

ng.bioformats_gen_zarr(
file_path=file_path,
input_fname=input_tiff,
rechunk=True,
)


@task
def generate_ng_asset(file_path: FilePath) -> Dict:
output_zarr = Path(f"{file_path.assets_dir}/{file_path.base}.zarr")
zarr_images = HedwigZarrImages(zarr_path=output_zarr, read_only=False)
zarr_image = zarr_images[list(zarr_images.get_series_keys())[0]]
ng_asset = file_path.gen_asset(
asset_type=AssetType.NEUROGLANCER_ZARR, asset_fp=asset_fp
asset_type=AssetType.NEUROGLANCER_ZARR, asset_fp=output_zarr
)
ng_asset["metadata"] = dict(
shader=zarr_image.shader_type,
Expand Down Expand Up @@ -160,7 +139,8 @@ def gen_thumb(file_path: FilePath):
)
fps = utils.gen_fps(share_name=file_share, input_dir=input_dir_fp, fps_in=input_fps)
tiffs = convert_png_to_tiff.map(file_path=fps)
zarr_assets = bioformats_gen_zarr.map(file_path=fps, upstream_tasks=[tiffs])
zarrs = gen_zarr.map(file_path=fps, upstream_tasks=[tiffs])
zarr_assets = generate_ng_asset.map(file_path=fps, upstream_tasks=[zarrs])
thumb_assets = gen_thumb.map(file_path=fps, upstream_tasks=[zarr_assets])
prim_fps = utils.gen_prim_fps.map(fp_in=fps)
callback_with_thumbs = utils.add_asset.map(prim_fp=prim_fps, asset=thumb_assets)
Expand Down
48 changes: 40 additions & 8 deletions em_workflows/sem_tomo/flow.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from em_workflows.file_path import FilePath
from pathlib import Path
import glob
from natsort import os_sorted
import math
from typing import Dict
from prefect import Flow, task, Parameter, unmapped
from prefect.run_configs import LocalRun
from pytools.workflow_functions import visual_min_max

from em_workflows.utils import utils
from em_workflows.utils import neuroglancer as ng
from em_workflows.file_path import FilePath
from em_workflows.constants import AssetType
from em_workflows.sem_tomo.config import SEMConfig
from em_workflows.sem_tomo.constants import FIBSEM_DEPTH, FIBSEM_HEIGHT, FIBSEM_WIDTH
Expand Down Expand Up @@ -207,6 +209,41 @@ def gen_keyimg_small(fp_in: FilePath) -> Dict:
return keyimg_asset


@task
def gen_zarr(fp_in: FilePath) -> None:
file_path = fp_in
# fallback mrc file
input_file = file_path.fp_in.as_posix()

base_mrc = file_path.gen_output_fp(output_ext=".mrc", out_fname="adjusted.mrc")
if base_mrc.is_file():
input_file = base_mrc.as_posix()

ng.bioformats_gen_zarr(
file_path=file_path,
input_fname=input_file,
depth=FIBSEM_DEPTH,
width=FIBSEM_WIDTH,
height=FIBSEM_HEIGHT,
resolutions=1,
)
ng.zarr_build_multiscales(fp_in)


@task
def gen_ng_metadata(fp_in: FilePath) -> Dict:
file_path = fp_in
asset_fp = Path(f"{file_path.assets_dir}/{file_path.base}.zarr")

first_zarr_arr = Path(asset_fp.as_posix() + "/0")

ng_asset = file_path.gen_asset(
asset_type=AssetType.NEUROGLANCER_ZARR, asset_fp=asset_fp
)
ng_asset["metadata"] = visual_min_max(mad_scale=5, input_image=first_zarr_arr)
return ng_asset


with Flow(
"sem_tomo",
state_handlers=[utils.notify_api_completion, utils.notify_api_running],
Expand Down Expand Up @@ -278,13 +315,8 @@ def gen_keyimg_small(fp_in: FilePath) -> Dict:
thumb_assets = gen_keyimg_small.map(fp_in=fps, upstream_tasks=[keyimg_assets])

# zarr file generation
pyramid_assets = ng.gen_zarr.map(
fp_in=fps,
depth=unmapped(FIBSEM_DEPTH),
width=unmapped(FIBSEM_WIDTH),
height=unmapped(FIBSEM_HEIGHT),
upstream_tasks=[base_mrcs],
)
zarrs = gen_zarr.map(fp_in=fps, upstream_tasks=[base_mrcs])
pyramid_assets = gen_ng_metadata.map(fp_in=fps, upstream_tasks=[zarrs])

# this is the toplevel element (the input file basically) onto which
# the "assets" (ie the outputs derived from this file) are hung.
Expand Down
Loading

0 comments on commit 188e108

Please sign in to comment.