Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor bf2raw command to be central/common function #323

Merged
merged 1 commit into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ jobs:
python -m pip install --upgrade pip
pip install -e . -r requirements.txt
- name: Directory setup
run: |
cp .env.sample .env
- name: Test Coverage
continue-on-error: true
run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ coverage.xml
.hypothesis/
.pytest_cache/

test/tmp/

# Translations
*.mo
*.pot
Expand Down
47 changes: 39 additions & 8 deletions em_workflows/brt/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- Finally, we ``POST`` the JSON datastructure to the API, and cleanup temp dirs.
"""

from typing import Dict
import glob
import os
from em_workflows.file_path import FilePath
Expand All @@ -45,6 +46,7 @@
from prefect import task, Flow, Parameter, unmapped
from prefect.run_configs import LocalRun
from prefect.engine import signals
from pytools.workflow_functions import visual_min_max

from em_workflows.utils import utils
from em_workflows.utils import neuroglancer as ng
Expand Down Expand Up @@ -475,6 +477,41 @@ def cleanup_files(file_path: FilePath, pattern=str):
# return inputs_paired


@task
def gen_zarr(fp_in: FilePath):
file_path = fp_in
# fallback mrc file
input_file = file_path.fp_in.as_posix()

rec_mrc = file_path.gen_output_fp(output_ext="_rec.mrc")
if rec_mrc.is_file():
input_file = rec_mrc.as_posix()

ng.bioformats_gen_zarr(
file_path=file_path,
input_fname=input_file,
depth=BRT_DEPTH,
width=BRT_WIDTH,
height=BRT_HEIGHT,
resolutions=1,
)
ng.zarr_build_multiscales(file_path)


@task
def gen_ng_metadata(fp_in: FilePath) -> Dict:
file_path = fp_in
asset_fp = Path(f"{file_path.assets_dir}/{file_path.base}.zarr")

first_zarr_arr = Path(asset_fp.as_posix() + "/0")

ng_asset = file_path.gen_asset(
asset_type=AssetType.NEUROGLANCER_ZARR, asset_fp=asset_fp
)
ng_asset["metadata"] = visual_min_max(mad_scale=5, input_image=first_zarr_arr)
return ng_asset


with Flow(
"brt_flow",
executor=BRTConfig.SLURM_EXECUTOR,
Expand Down Expand Up @@ -595,14 +632,8 @@ def cleanup_files(file_path: FilePath, pattern=str):
)
# finished volslicer inputs.

# START PYRAMID GEN
pyramid_assets = ng.gen_zarr.map(
fp_in=fps,
depth=unmapped(BRT_DEPTH),
width=unmapped(BRT_WIDTH),
height=unmapped(BRT_HEIGHT),
upstream_tasks=[brts],
)
zarrs = gen_zarr.map(fp_in=fps, upstream_tasks=[brts])
pyramid_assets = gen_ng_metadata.map(fp_in=fps, upstream_tasks=[zarrs])
# archive_pyramid_cmds = ng.gen_archive_pyr.map(
# file_path=fps, upstream_tasks=[pyramid_assets]
# )
Expand Down
1 change: 1 addition & 0 deletions em_workflows/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

LARGE_DIM = 1024
SMALL_DIM = 300
RECHUNK_SIZE = 512

BIOFORMATS_NUM_WORKERS = 3
# This is expected to be less than the available memory for a dask worker
Expand Down
1 change: 0 additions & 1 deletion em_workflows/czi/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
VALID_CZI_INPUTS = ["czi", "CZI"]
RECHUNK_SIZE = 512
SITK_COMPRESSION_LVL = 90
THUMB_X_DIM = 300
THUMB_Y_DUM = 300
49 changes: 7 additions & 42 deletions em_workflows/czi/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
from pytools.HedwigZarrImages import HedwigZarrImages
from em_workflows.file_path import FilePath
from em_workflows.utils import utils
from em_workflows.utils import neuroglancer as ng
from prefect.run_configs import LocalRun
from em_workflows.constants import BIOFORMATS_NUM_WORKERS
from em_workflows.czi.constants import (
RECHUNK_SIZE,
VALID_CZI_INPUTS,
THUMB_X_DIM,
THUMB_Y_DUM,
Expand All @@ -18,12 +17,6 @@
from em_workflows.czi.config import CZIConfig


def rechunk_zarr(zarr_fp: Path) -> None:
images = HedwigZarrImages(zarr_fp, read_only=False)
for _, image in images.series():
image.rechunk(RECHUNK_SIZE)


def gen_thumb(image: HedwigZarrImage, file_path: FilePath, image_name: str) -> dict:
sitk_image = image.extract_2d(
target_size_x=THUMB_X_DIM, target_size_y=THUMB_Y_DUM, auto_uint8=True
Expand Down Expand Up @@ -84,42 +77,14 @@ def gen_imageSet(file_path: FilePath) -> List:
return image_set


def bioformats_gen_zarr(file_path: FilePath):
"""
TODO, refactor this into ng.gen_zarr
bioformats2raw --max_workers=$nproc --downsample-type AREA
--compression=blosc --compression-properties cname=zstd
--compression-properties clevel=5 --compression-properties shuffle=1
input.tiff output.zarr
"""
input_czi = f"{file_path.proj_dir}/{file_path.base}.czi"
output_zarr = f"{file_path.working_dir}/{file_path.base}.zarr"
log_fp = f"{file_path.working_dir}/{file_path.base}_as_zarr.log"
cmd = [
CZIConfig.bioformats2raw,
f"--max_workers={BIOFORMATS_NUM_WORKERS}",
"--overwrite",
"--downsample-type",
"AREA",
"--compression=blosc",
"--compression-properties",
"cname=zstd",
"--compression-properties",
"clevel=5",
"--compression-properties",
"shuffle=1",
input_czi,
output_zarr,
]
FilePath.run(cmd, log_fp)
output_zarr = f"{file_path.working_dir}/{file_path.base}.zarr"
file_path.copy_to_assets_dir(fp_to_cp=Path(output_zarr))
rechunk_zarr(zarr_fp=Path(output_zarr))


@task
def generate_czi_imageset(file_path: FilePath):
bioformats_gen_zarr(file_path)
input_czi = f"{file_path.proj_dir}/{file_path.base}.czi"
ng.bioformats_gen_zarr(
file_path=file_path,
input_fname=input_czi,
rechunk=True,
)
imageSet = gen_imageSet(file_path=file_path)
# extract images from input file, used to create imageSet elements
return imageSet
Expand Down
60 changes: 20 additions & 40 deletions em_workflows/lrg_2d_rgb/flow.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import os
from typing import Dict
from pathlib import Path
import SimpleITK as sitk
from pytools import HedwigZarrImage, HedwigZarrImages
from prefect import Flow, task, Parameter
from prefect.run_configs import LocalRun

from em_workflows.utils import utils
from em_workflows.utils import neuroglancer as ng
from em_workflows.file_path import FilePath
from em_workflows.constants import AssetType, BIOFORMATS_NUM_WORKERS, JAVA_MAX_HEAP_SIZE
from em_workflows.constants import AssetType
from em_workflows.lrg_2d_rgb.config import LRG2DConfig
from em_workflows.lrg_2d_rgb.constants import (
LARGE_THUMB_X,
Expand Down Expand Up @@ -46,45 +47,23 @@ def convert_png_to_tiff(file_path: FilePath):


@task
def bioformats_gen_zarr(file_path: FilePath):
"""
TODO, refactor this into ng.gen_zarr
bioformats2raw --scale-format-string '%2$d' --downsample-type AREA
--compression=blosc --compression-properties cname=zstd
--compression-properties clevel=5 --compression-properties shuffle=1
input.tiff output.zarr
"""

def gen_zarr(file_path: FilePath) -> None:
input_tiff = f"{file_path.working_dir}/{file_path.base}.tiff"
output_zarr = f"{file_path.working_dir}/{file_path.base}.zarr"
log_fp = f"{file_path.working_dir}/{file_path.base}_as_zarr.log"
# increase java max heap size for bf2raw command (just in case)
os.environ["_JAVA_OPTIONS"] = JAVA_MAX_HEAP_SIZE
cmd = [
LRG2DConfig.bioformats2raw,
f"--max_workers={BIOFORMATS_NUM_WORKERS}",
"--overwrite",
"--scale-format-string",
"%2$d",
"--downsample-type",
"AREA",
"--compression=blosc",
"--compression-properties",
"cname=zstd",
"--compression-properties",
"clevel=5",
"--compression-properties",
"shuffle=1",
input_tiff,
output_zarr,
]
FilePath.run(cmd, log_fp)
zarr_images = HedwigZarrImages(zarr_path=Path(output_zarr), read_only=False)
zarr_image: HedwigZarrImage = zarr_images[list(zarr_images.get_series_keys())[0]]
zarr_image.rechunk(512)
asset_fp = file_path.copy_to_assets_dir(fp_to_cp=Path(output_zarr))

ng.bioformats_gen_zarr(
file_path=file_path,
input_fname=input_tiff,
rechunk=True,
)


@task
def generate_ng_asset(file_path: FilePath) -> Dict:
output_zarr = Path(f"{file_path.assets_dir}/{file_path.base}.zarr")
zarr_images = HedwigZarrImages(zarr_path=output_zarr, read_only=False)
zarr_image = zarr_images[list(zarr_images.get_series_keys())[0]]
ng_asset = file_path.gen_asset(
asset_type=AssetType.NEUROGLANCER_ZARR, asset_fp=asset_fp
asset_type=AssetType.NEUROGLANCER_ZARR, asset_fp=output_zarr
)
ng_asset["metadata"] = dict(
shader=zarr_image.shader_type,
Expand Down Expand Up @@ -160,7 +139,8 @@ def gen_thumb(file_path: FilePath):
)
fps = utils.gen_fps(share_name=file_share, input_dir=input_dir_fp, fps_in=input_fps)
tiffs = convert_png_to_tiff.map(file_path=fps)
zarr_assets = bioformats_gen_zarr.map(file_path=fps, upstream_tasks=[tiffs])
zarrs = gen_zarr.map(file_path=fps, upstream_tasks=[tiffs])
zarr_assets = generate_ng_asset.map(file_path=fps, upstream_tasks=[zarrs])
thumb_assets = gen_thumb.map(file_path=fps, upstream_tasks=[zarr_assets])
prim_fps = utils.gen_prim_fps.map(fp_in=fps)
callback_with_thumbs = utils.add_asset.map(prim_fp=prim_fps, asset=thumb_assets)
Expand Down
48 changes: 40 additions & 8 deletions em_workflows/sem_tomo/flow.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from em_workflows.file_path import FilePath
from pathlib import Path
import glob
from natsort import os_sorted
import math
from typing import Dict
from prefect import Flow, task, Parameter, unmapped
from prefect.run_configs import LocalRun
from pytools.workflow_functions import visual_min_max

from em_workflows.utils import utils
from em_workflows.utils import neuroglancer as ng
from em_workflows.file_path import FilePath
from em_workflows.constants import AssetType
from em_workflows.sem_tomo.config import SEMConfig
from em_workflows.sem_tomo.constants import FIBSEM_DEPTH, FIBSEM_HEIGHT, FIBSEM_WIDTH
Expand Down Expand Up @@ -207,6 +209,41 @@ def gen_keyimg_small(fp_in: FilePath) -> Dict:
return keyimg_asset


@task
def gen_zarr(fp_in: FilePath) -> None:
file_path = fp_in
# fallback mrc file
input_file = file_path.fp_in.as_posix()

base_mrc = file_path.gen_output_fp(output_ext=".mrc", out_fname="adjusted.mrc")
if base_mrc.is_file():
input_file = base_mrc.as_posix()

ng.bioformats_gen_zarr(
file_path=file_path,
input_fname=input_file,
depth=FIBSEM_DEPTH,
width=FIBSEM_WIDTH,
height=FIBSEM_HEIGHT,
resolutions=1,
)
ng.zarr_build_multiscales(fp_in)


@task
def gen_ng_metadata(fp_in: FilePath) -> Dict:
file_path = fp_in
asset_fp = Path(f"{file_path.assets_dir}/{file_path.base}.zarr")

first_zarr_arr = Path(asset_fp.as_posix() + "/0")

ng_asset = file_path.gen_asset(
asset_type=AssetType.NEUROGLANCER_ZARR, asset_fp=asset_fp
)
ng_asset["metadata"] = visual_min_max(mad_scale=5, input_image=first_zarr_arr)
return ng_asset


with Flow(
"sem_tomo",
state_handlers=[utils.notify_api_completion, utils.notify_api_running],
Expand Down Expand Up @@ -278,13 +315,8 @@ def gen_keyimg_small(fp_in: FilePath) -> Dict:
thumb_assets = gen_keyimg_small.map(fp_in=fps, upstream_tasks=[keyimg_assets])

# zarr file generation
pyramid_assets = ng.gen_zarr.map(
fp_in=fps,
depth=unmapped(FIBSEM_DEPTH),
width=unmapped(FIBSEM_WIDTH),
height=unmapped(FIBSEM_HEIGHT),
upstream_tasks=[base_mrcs],
)
zarrs = gen_zarr.map(fp_in=fps, upstream_tasks=[base_mrcs])
pyramid_assets = gen_ng_metadata.map(fp_in=fps, upstream_tasks=[zarrs])

# this is the toplevel element (the input file basically) onto which
# the "assets" (ie the outputs derived from this file) are hung.
Expand Down
Loading
Loading