diff --git a/docs/api.rst b/docs/api.rst index 9bf61ec..eac5dae 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,19 +1,23 @@ API === + +.. automodule:: pytools + :members: + .. automodule:: pytools.meta :members: .. automodule:: pytools.utils.zarr :members: -.. automodule:: pytools.convert +.. automodule:: pytools.utils.histogram :members: -.. automodule:: pytools.ng.mrc2nifti +.. automodule:: pytools.convert :members: -.. automodule:: pytools.ng.build_histogram +.. automodule:: pytools.ng.mrc2nifti :members: .. automodule:: pytools.zarr_rechunk diff --git a/docs/commandline.rst b/docs/commandline.rst index f18ed9e..aaa581d 100644 --- a/docs/commandline.rst +++ b/docs/commandline.rst @@ -5,13 +5,13 @@ The Pytools packages contain a command line executables for various tasks. They .. code-block :: bash - mrc_visual_min_max --help + mrc2nifti --help Or the preferred way using the `python` executable to execute the module entry point: .. code-block :: bash - python -m mrc_visual_min_max --help + python -m mrc2nifti --help With either method of invoking the command line interface, the following sections describes the sub-commands available and the command line options available. @@ -19,8 +19,3 @@ and the command line options available. .. click:: pytools.ng.mrc2nifti:main :prog: mrc2nifti -.. click:: pytools.ng.mrc2ngpc:main - :prog: mrc2npgc - -.. click:: pytools.ng.build_histogram:main - :prog: mrc_visual_min_max diff --git a/pyproject.toml b/pyproject.toml index 625e9c5..1f87ac7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,6 @@ local_scheme = "dirty-tag" [project.scripts] mrc2nifti = "pytools.ng.mrc2nifti:main" -mrc_visual_min_max = "pytools.ng.build_histogram:main" zarr_rechunk = "pytools.zarr_rechunk:main" zarr_build_multiscales = "pytools.zarr_build_multiscales:main" diff --git a/pytools/__init__.py b/pytools/__init__.py index 33f4bf1..994d677 100644 --- a/pytools/__init__.py +++ b/pytools/__init__.py @@ -12,10 +12,19 @@ # limitations under the License. # +from .workflow_functions import visual_min_max + +import logging + +logger = logging.getLogger(__name__) +del logging + +_installed_package = "tomojs_pytools" try: from ._version import version as __version__ except ImportError: pass -__all__ = ["__version__"] + +__all__ = ["__version__", "visual_min_max", "logger"] diff --git a/pytools/ng/build_histogram.py b/pytools/ng/build_histogram.py deleted file mode 100644 index ee77551..0000000 --- a/pytools/ng/build_histogram.py +++ /dev/null @@ -1,427 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0.txt -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import math -import click -import numpy as np -import logging -import SimpleITK as sitk -import zarr - -from pytools.utils import MutuallyExclusiveOption -from pytools import __version__ -from math import floor, ceil -from pathlib import Path -import dask.array -from typing import Union, Tuple, Any -from abc import ABC, abstractmethod - - -logger = logging.getLogger(__name__) - - -def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False, old_style=False): - """Very close to numpy.percentile, but supports weights. - NOTE: quantiles should be in [0, 1]! - :param values: numpy.array with data - :param quantiles: array-like with many quantiles needed - :param sample_weight: array-like of the same length as `array` - :param values_sorted: bool, if True, then will avoid sorting of - initial array - :param old_style: if True, will correct output to be consistent - with numpy.percentile. - :return: numpy.array with computed quantiles. - """ - values = np.array(values) - quantiles = np.array(quantiles) - if sample_weight is None: - sample_weight = np.ones(len(values)) - sample_weight = np.array(sample_weight) - - # Remove zero weighted samples - non_zero_mask = sample_weight != 0 - sample_weight = sample_weight[non_zero_mask] - values = values[non_zero_mask] - - assert np.all(quantiles >= 0) and np.all(quantiles <= 1), "quantiles should be in [0, 1]" - - if not values_sorted: - sorter = np.argsort(values) - values = values[sorter] - sample_weight = sample_weight[sorter] - - weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight - if old_style: - # To be convenient with numpy.percentile - weighted_quantiles -= weighted_quantiles[0] - weighted_quantiles /= weighted_quantiles[-1] - else: - weighted_quantiles /= np.sum(sample_weight) - - return np.interp(quantiles, weighted_quantiles, values) - - -class HistogramBase(ABC): - @property - @abstractmethod - def dtype(self): - pass - - @abstractmethod - def compute_min_max(self) -> Tuple[Any, Any]: - pass - - def compute_histogram_bin_edges(self, number_of_bins=1024) -> np.array: - if np.issubdtype(self.dtype, np.integer) and 2 ** np.iinfo(self.dtype).bits < number_of_bins: - return np.arange(np.iinfo(self.dtype).min - 0.5, np.iinfo(self.dtype).max + 1.5) - - imin, imax = self.compute_min_max() - - if np.issubdtype(self.dtype, np.inexact): - if imax - imin < np.finfo(imin).eps * number_of_bins: - logger.warning("Computed difference between minimum and maximum is below tolerances.") - imax = imin + np.finfo(imin).eps * number_of_bins / 2 - imin = imin - np.finfo(imin).eps * number_of_bins / 2 - elif np.issubdtype(self.dtype, np.integer) and imax - imin < number_of_bins: - return np.arange(imin - 0.5, imax + 1.5) - - step = (imax - imin) / number_of_bins - - logger.debug(f"Computed minimum: {imin} maximum: {imax} step: {step}") - histogram_bin_edges = np.arange(imin, imax + step, step) - return histogram_bin_edges - - @abstractmethod - def compute_histogram(self, histogram_bin_edges=None, density=False) -> Tuple[np.array, np.array]: - pass - - -class sitkHistogramHelper(HistogramBase): - """ - Read image slice by slice, and build a histogram. The image file must be readable by SimpleITK. - The SimpleITK is expected to support streaming the file format. - - The np.histogram function is run on each image slice with the provided histogram_bin_edges, and - accumulated for the results. - - :param filename: The path to the image file to read. MRC file type is recommend. - :param histogram_bin_edges: A monotonically increasing array of min edges. The resulting - histogram or weights will have n-1 elements. If None, then it will be automatically computed for integers, and - an np.bincount may be used as an optimization. - :param extract_axis: The image dimension which is sliced during image reading. - :param density: If true the sum of the results is 1.0, otherwise it is the count of values in each bin. - :param extract_step: The number of slices to read at one time. - """ - - def __init__(self, filename, extract_axis=2, extract_step=1): - self.reader = sitk.ImageFileReader() - self.reader.SetFileName(str(filename)) - self.reader.ReadImageInformation() - - logger.info(f'Reading "{self.reader.GetFileName()}" image information...') - - logger.info(f"\tPixel Type: {sitk.GetPixelIDValueAsString(self.reader.GetPixelIDValue())}") - logger.info(f"\tPixel Type: {sitk.GetPixelIDValueAsString(self.reader.GetPixelIDValue())}") - logger.info(f"\tSize: {self.reader.GetSize()}") - logger.info(f"\tSpacing: {self.reader.GetSpacing()}") - logger.info(f"\tOrigin: {self.reader.GetOrigin()}") - - self.extract_axis = extract_axis - self.extract_step = extract_step - - def compute_min_max(self): - img = self.reader.Execute() - - min_max_filter = sitk.MinimumMaximumImageFilter() - min_max_filter.Execute(img) - return min_max_filter.GetMinimum(), min_max_filter.GetMaximum() - - @property - def dtype(self): - return sitk.extra._get_numpy_dtype(self.reader) - - def compute_histogram(self, histogram_bin_edges=None, density=False) -> Tuple[np.array, np.array]: - use_bincount = False - if histogram_bin_edges is None: - if np.issubdtype(self.dtype, np.integer) and np.iinfo(self.dtype).bits <= 16: - histogram_bin_edges = self.compute_histogram_bin_edges( - number_of_bins=2 ** np.iinfo(self.dtype).bits + 1 - ) - if self.dtype() in (np.uint8, np.uint16): - use_bincount = True - else: - histogram_bin_edges = self.compute_histogram_bin_edges() - - h = np.zeros(len(histogram_bin_edges) - 1, dtype=np.int64) - - extract_index = [0] * self.reader.GetDimension() - - size = self.reader.GetSize() - extract_size = list(size) - extract_size[self.extract_axis] = 0 - self.reader.SetExtractSize(extract_size) - - for i in range(0, self.reader.GetSize()[self.extract_axis], self.extract_step): - extract_index[self.extract_axis] = i - self.reader.SetExtractIndex(extract_index) - logger.debug(f"extract_index: {extract_index}") - - extract_size[self.extract_axis] = min(i + self.extract_step, size[self.extract_axis]) - i - self.reader.SetExtractSize(extract_size) - img = self.reader.Execute() - - # accumulate histogram counts - if use_bincount: - h += np.bincount(sitk.GetArrayViewFromImage(img).ravel(), minlength=len(h)) - else: - h += np.histogram(sitk.GetArrayViewFromImage(img).ravel(), bins=histogram_bin_edges, density=False)[0] - - if density: - h /= np.sum(h) - - return h, histogram_bin_edges - - -class daskHisogramHelper(HistogramBase): - def __init__(self, arr: dask.array): - self._arr = arr - if not self._arr.dtype.isnative: - logging.info("ZARR array needs converting to native byteorder.") - self._arr = self._arr.astype(self._arr.dtype.newbyteorder("=")) - - def compute_min_max(self): - return self._arr.min(), self._arr.max() - - @property - def dtype(self): - return self._arr.dtype - - def compute_histogram(self, histogram_bin_edges=None, density=False) -> Tuple[np.array, np.array]: - if histogram_bin_edges is None: - if np.issubdtype(self.dtype, np.integer) and np.iinfo(self.dtype).bits <= 16: - histogram_bin_edges = self.compute_histogram_bin_edges( - number_of_bins=2 ** np.iinfo(self.dtype).bits + 1 - ) - - if np.dtype(self.dtype) in (np.uint8, np.uint16): - return ( - dask.array.bincount(self._arr.ravel(), minlength=len(histogram_bin_edges) - 1).compute(), - histogram_bin_edges, - ) - - else: - histogram_bin_edges = self.compute_histogram_bin_edges() - - h, bins = dask.array.histogram(self._arr.ravel(), bins=histogram_bin_edges, density=density) - return h.compute(), bins - - -class zarrHisogramHelper(daskHisogramHelper): - def __init__(self, filename): - za = zarr.open_array(filename, mode="r") - super().__init__(dask.array.from_zarr(za)) - logging.debug(za.info) - - -def stream_build_histogram( - filename: Union[Path, str], histogram_bin_edges=None, extract_axis=2, density=False, extract_step=1 -): - """ - Read image slice by slice, and build a histogram. The image file must be readable by SimpleITK. - The SimpleITK is expected to support streaming the file format. - The np.histogram function is run on each image slice with the provided histogram_bin_edges, and - accumulated for the results. - :param filename: The path to the image file to read. MRC file type is recommend. - :param histogram_bin_edges: A monotonically increasing array of min edges. The resulting - histogram or weights will have n-1 elements. If None, then it will be automatically computed for integers, and - an np.bincount may be used as an optimization. - :param extract_axis: The image dimension which is sliced during image reading. - :param density: If true the sum of the results is 1.0, otherwise it is the count of values in each bin. - :param extract_step: The number of slices to read at one time. - """ - - input_image = Path(filename) - - if input_image.is_dir() and (input_image / ".zarray").exists(): - histo = zarrHisogramHelper(input_image) - - else: - histo = sitkHistogramHelper(filename, extract_axis=extract_axis, extract_step=extract_step) - - return histo.compute_histogram(histogram_bin_edges=histogram_bin_edges, density=density) - - -def histogram_robust_stats(hist, bin_edges): - """ - Computes the "median" and "mad" (Median Absolute Deviation). - - :param hist: The histogram weights ( density or count ). - :param bin_edges: The edges of the bins. This array should be one greater than the hist. - """ - assert len(hist) + 1 == len(bin_edges) - - mids = 0.5 * (bin_edges[1:] + bin_edges[:-1]) - - median = weighted_quantile(mids, values_sorted=True, sample_weight=hist, quantiles=0.5) - - mad = weighted_quantile(np.abs(mids - median), values_sorted=False, sample_weight=hist, quantiles=0.5) - - return {"median": median, "mad": mad} - - -def histogram_stats(hist, bin_edges): - """ - Computes the "mean", "var" (variance), and "sigma" (standard deviation) from the provided histogram. - - :param hist: The histogram weights ( density or count ). - :param bin_edges: The edges of the bins. This array should be one greater than the hist. - """ - - assert len(hist) + 1 == len(bin_edges) - - results = {} - - mids = 0.5 * (bin_edges[1:] + bin_edges[:-1]) - - results["mean"] = np.average(mids, weights=hist) - results["var"] = np.average((mids - results["mean"]) ** 2, weights=hist) - results["sigma"] = math.sqrt(results["var"]) - - return results - - -@click.command() -@click.argument("input_image", type=click.Path(exists=True, dir_okay=True, path_type=Path)) -@click.option( - "--mad", - "mad_scale", - type=float, - cls=MutuallyExclusiveOption, - mutually_exclusive=["sigma", "percentile-crop"], - help="Use INPUT_IMAGE's robust median absolute deviation (MAD) scale by option's value about the median for " - "minimum and maximum range. ", -) -@click.option( - "--sigma", - "sigma_scale", - type=float, - cls=MutuallyExclusiveOption, - mutually_exclusive=["mad", "percentile-crop"], - help="Use INPUT_IMAGE's standard deviation (sigma) scale by option's value about the mean for minimum and " - "maximum range. ", -) -@click.option( - "--percentile", - type=click.FloatRange(0.0, 100), - cls=MutuallyExclusiveOption, - mutually_exclusive=["sigma", "mad"], - help="Use INPUT_IMAGE's middle percentile (option's value) of data for minimum and maximum range.", -) -@click.option( - "--clamp/--no-clamp", - default=False, - help="Clamps minimum and maximum range to existing intensity values (floor and limit).", -) -@click.option( - "--output-json", - type=click.Path(exists=False, dir_okay=False, resolve_path=True), - help='The output filename produced in JSON format with "neuroglancerPrecomputedMin", ' - '"neuroglancerPrecomputedMax", "neuroglancerPrecomputedFloor" and "neuroglancerPrecomputedLimit" data ' - "elements of a double numeric value.", -) -@click.option( - "--log-level", default="INFO", type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False) -) -@click.version_option(__version__) -def main(input_image: Path, mad_scale, sigma_scale, percentile, clamp, output_json, log_level): - """ - Reads the INPUT_IMAGE to compute an estimated minimum and maximum range to be used for visualization of the - data set. The image is required to have an integer pixel type. - - The optional OUTPUT_JSON filename will be created with the following data elements with integer values as strings: - "neuroglancerPrecomputedMin" - "neuroglancerPrecomputedMax" - "neuroglancerPrecomputedFloor" - "neuroglancerPrecomputedLimit" - """ - - logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.getLevelName(log_level)) - - if input_image.is_dir() and (input_image / ".zarray").exists(): - histo = zarrHisogramHelper(input_image) - elif input_image.suffix in (".nii", ".mha", ".mrc", ".rec"): - from SimpleITK.utilities.dask import from_sitk - - logger.debug("Loading chunk with SimpleITK and dask...") - sitk_da = from_sitk(input_image, chunks=(1, -1, -1)) - histo = daskHisogramHelper(sitk_da) - else: - logger.debug("Loading whole image with SimpleITK...") - img = sitk.ReadImage(input_image) - histo = daskHisogramHelper(dask.array.from_array(sitk.GetArrayViewFromImage(img), chunks=(1, -1, -1))) - - logger.info(f'Building histogram for "{input_image}"...') - h, bins = histo.compute_histogram(histogram_bin_edges=None, density=False) - - mids = 0.5 * (bins[1:] + bins[:-1]) - - logger.info("Computing statistics...") - if mad_scale: - stats = histogram_robust_stats(h, bins) - logger.debug(f"stats: {stats}") - min_max = (stats["median"] - stats["mad"] * mad_scale, stats["median"] + stats["mad"] * mad_scale) - elif sigma_scale: - stats = histogram_stats(h, bins) - logger.debug(f"stats: {stats}") - min_max = (stats["mean"] - stats["sigma"] * sigma_scale, stats["mean"] + stats["sigma"] * sigma_scale) - elif percentile: - lower_quantile = (0.5 * (100 - percentile)) / 100.0 - upper_quantile = percentile / 100.0 + lower_quantile - logger.debug(f"quantiles: {lower_quantile} {upper_quantile}") - - # cs = np.cumsum(h) - # min_max = (np.searchsorted(cs, cs[-1] * (percentile_crop * .005)), - # np.searchsorted(cs, cs[-1] * (1.0 - percentile_crop * .005))) - # min_max = (mids[min_max[0]], mids[min_max[1]]) - min_max = weighted_quantile( - mids, quantiles=[lower_quantile, upper_quantile], sample_weight=h, values_sorted=True - ) - else: - raise RuntimeError("Missing expected argument") - - floor_limit = weighted_quantile(mids, quantiles=[0.0, 1.0], sample_weight=h, values_sorted=True) - - if clamp: - min_max = (max(min_max[0], floor_limit[0]), min(min_max[1], floor_limit[1])) - - output = { - "neuroglancerPrecomputedMin": str(floor(min_max[0])), - "neuroglancerPrecomputedMax": str(ceil(min_max[1])), - "neuroglancerPrecomputedFloor": str(floor(floor_limit[0])), - "neuroglancerPrecomputedLimit": str(ceil(floor_limit[1])), - } - - logger.debug(f"output: {output}") - if output_json: - import json - - with open(output_json, "w") as fp: - json.dump(output, fp) - else: - print(output) - - return output - - -if __name__ == "__main__": - main() diff --git a/pytools/ng/mrc2nifti.py b/pytools/ng/mrc2nifti.py index 708a1be..b52c263 100755 --- a/pytools/ng/mrc2nifti.py +++ b/pytools/ng/mrc2nifti.py @@ -36,7 +36,6 @@ def sub_volume_execute(inplace=True): def wrapper(func): @wraps(func) def slice_by_slice(image: sitk.Image, *args, **kwargs): - dim = image.GetDimension() iter_dim = 2 diff --git a/pytools/utils/histogram.py b/pytools/utils/histogram.py new file mode 100644 index 0000000..9e26368 --- /dev/null +++ b/pytools/utils/histogram.py @@ -0,0 +1,179 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import logging +import zarr +import dask.array +from typing import Tuple, Any +from abc import ABC, abstractmethod +import math + +logger = logging.getLogger(__name__) + + +def weighted_quantile(values, quantiles, sample_weight=None, values_sorted=False, old_style=False): + """Very close to numpy.percentile, but supports weights. + NOTE: quantiles should be in [0, 1]! + :param values: numpy.array with data + :param quantiles: array-like with many quantiles needed + :param sample_weight: array-like of the same length as `array` + :param values_sorted: bool, if True, then will avoid sorting of + initial array + :param old_style: if True, will correct output to be consistent + with numpy.percentile. + :return: numpy.array with computed quantiles. + """ + values = np.array(values) + quantiles = np.array(quantiles) + if sample_weight is None: + sample_weight = np.ones(len(values)) + sample_weight = np.array(sample_weight) + + # Remove zero weighted samples + non_zero_mask = sample_weight != 0 + sample_weight = sample_weight[non_zero_mask] + values = values[non_zero_mask] + + assert np.all(quantiles >= 0) and np.all(quantiles <= 1), "quantiles should be in [0, 1]" + + if not values_sorted: + sorter = np.argsort(values) + values = values[sorter] + sample_weight = sample_weight[sorter] + + weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight + if old_style: + # To be convenient with numpy.percentile + weighted_quantiles -= weighted_quantiles[0] + weighted_quantiles /= weighted_quantiles[-1] + else: + weighted_quantiles /= np.sum(sample_weight) + + return np.interp(quantiles, weighted_quantiles, values) + + +class HistogramBase(ABC): + @property + @abstractmethod + def dtype(self): + pass + + @abstractmethod + def compute_min_max(self) -> Tuple[Any, Any]: + pass + + def compute_histogram_bin_edges(self, number_of_bins=1024) -> np.array: + if np.issubdtype(self.dtype, np.integer) and 2 ** np.iinfo(self.dtype).bits < number_of_bins: + return np.arange(np.iinfo(self.dtype).min - 0.5, np.iinfo(self.dtype).max + 1.5) + + imin, imax = self.compute_min_max() + + if np.issubdtype(self.dtype, np.inexact): + if imax - imin < np.finfo(imin).eps * number_of_bins: + logger.warning("Computed difference between minimum and maximum is below tolerances.") + imax = imin + np.finfo(imin).eps * number_of_bins / 2 + imin = imin - np.finfo(imin).eps * number_of_bins / 2 + elif np.issubdtype(self.dtype, np.integer) and imax - imin < number_of_bins: + return np.arange(imin - 0.5, imax + 1.5) + + step = (imax - imin) / number_of_bins + + logger.debug(f"Computed minimum: {imin} maximum: {imax} step: {step}") + histogram_bin_edges = np.arange(imin, imax + step, step) + return histogram_bin_edges + + @abstractmethod + def compute_histogram(self, histogram_bin_edges=None, density=False) -> Tuple[np.array, np.array]: + pass + + +class DaskHistogramHelper(HistogramBase): + def __init__(self, arr: dask.array): + self._arr = arr + if not self._arr.dtype.isnative: + logging.info("ZARR array needs converting to native byteorder.") + self._arr = self._arr.astype(self._arr.dtype.newbyteorder("=")) + + def compute_min_max(self): + return self._arr.min(), self._arr.max() + + @property + def dtype(self): + return self._arr.dtype + + def compute_histogram(self, histogram_bin_edges=None, density=False) -> Tuple[np.array, np.array]: + if histogram_bin_edges is None: + if np.issubdtype(self.dtype, np.integer) and np.iinfo(self.dtype).bits <= 16: + histogram_bin_edges = self.compute_histogram_bin_edges( + number_of_bins=2 ** np.iinfo(self.dtype).bits + 1 + ) + + if np.dtype(self.dtype) in (np.uint8, np.uint16): + return ( + dask.array.bincount(self._arr.ravel(), minlength=len(histogram_bin_edges) - 1).compute(), + histogram_bin_edges, + ) + + else: + histogram_bin_edges = self.compute_histogram_bin_edges() + + h, bins = dask.array.histogram(self._arr.ravel(), bins=histogram_bin_edges, density=density) + return h.compute(), bins + + +class ZARRHistogramHelper(DaskHistogramHelper): + def __init__(self, filename): + za = zarr.open_array(filename, mode="r") + super().__init__(dask.array.from_zarr(za)) + logging.debug(za.info) + + +def histogram_robust_stats(hist, bin_edges): + """ + Computes the "median" and "mad" (Median Absolute Deviation). + + :param hist: The histogram weights ( density or count ). + :param bin_edges: The edges of the bins. This array should be one greater than the hist. + """ + assert len(hist) + 1 == len(bin_edges) + + mids = 0.5 * (bin_edges[1:] + bin_edges[:-1]) + + median = weighted_quantile(mids, values_sorted=True, sample_weight=hist, quantiles=0.5) + + mad = weighted_quantile(np.abs(mids - median), values_sorted=False, sample_weight=hist, quantiles=0.5) + + return {"median": median, "mad": mad} + + +def histogram_stats(hist, bin_edges): + """ + Computes the "mean", "var" (variance), and "sigma" (standard deviation) from the provided histogram. + + :param hist: The histogram weights ( density or count ). + :param bin_edges: The edges of the bins. This array should be one greater than the hist. + """ + + assert len(hist) + 1 == len(bin_edges) + + results = {} + + mids = 0.5 * (bin_edges[1:] + bin_edges[:-1]) + + results["mean"] = np.average(mids, weights=hist) + results["var"] = np.average((mids - results["mean"]) ** 2, weights=hist) + results["sigma"] = math.sqrt(results["var"]) + + return results diff --git a/pytools/workflow_functions.py b/pytools/workflow_functions.py new file mode 100644 index 0000000..8e57093 --- /dev/null +++ b/pytools/workflow_functions.py @@ -0,0 +1,87 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pathlib import Path +from typing import Dict, Union +import logging +import math +from SimpleITK.utilities.dask import from_sitk +from pytools.utils.histogram import DaskHistogramHelper, ZARRHistogramHelper, histogram_robust_stats, weighted_quantile +import SimpleITK as sitk +import dask.array + + +logger = logging.getLogger(__name__) + + +def visual_min_max( + input_image: Union[Path, str], + mad_scale: float, +) -> Dict[str, int]: + """Reads a path to an input_image file or a directory of zarr array to estimate minimum and maximum ranges to be + used for visualization of the data set in Neuroglancer. + + Dask is used for parallel reading and statistics computation. The global scheduler is used for all operations which + can be changed with standard Dask configurations. + + :param input_image: If an image file then SimpleITK is used to perform the IO. SimpleITK support formats such as + mrc, mii, png, tiff etc.A zarr array is detected by a directory containing a ".zarray" file. For an OME-NGFF + structured ZARR a subdirectory such as "0" commonly contains the full resolution ZARR array. Such a case would + be specified by "dirname.zarr/0". + + :param mad_scale: The scale factor for the robust median absolute deviation (MAD) about the median to produce the + "minimum and maximum range." + + :returns: The resulting dictionary will contain the following data elements with integer values as strings: + - "neuroglancerPrecomputedMin" + - "neuroglancerPrecomputedMax" + - "neuroglancerPrecomputedFloor" + - "neuroglancerPrecomputedLimit" + """ + + input_image = Path(input_image) + + if input_image.is_dir() and (input_image / ".zarray").exists(): + histo = ZARRHistogramHelper(input_image) + elif input_image.suffix in (".nii", ".mha", ".mrc", ".rec"): + logger.info("Loading chunk with SimpleITK and dask...") + sitk_da = from_sitk(input_image, chunks=(1, -1, -1)) + histo = DaskHistogramHelper(sitk_da) + else: + logger.info("Loading whole image with SimpleITK...") + img = sitk.ReadImage(input_image) + histo = DaskHistogramHelper(dask.array.from_array(sitk.GetArrayViewFromImage(img), chunks=(1, -1, -1))) + + logger.debug(f"dask.config.global_config: {dask.config.global_config}") + + logger.info(f'Building histogram for "{input_image}"...') + h, bins = histo.compute_histogram(histogram_bin_edges=None, density=False) + + mids = 0.5 * (bins[1:] + bins[:-1]) + + logger.info("Computing statistics...") + stats = histogram_robust_stats(h, bins) + logger.debug(f"stats: {stats}") + min_max = (stats["median"] - stats["mad"] * mad_scale, stats["median"] + stats["mad"] * mad_scale) + + floor_limit = weighted_quantile(mids, quantiles=[0.0, 1.0], sample_weight=h, values_sorted=True) + + output = { + "neuroglancerPrecomputedMin": str(math.floor(min_max[0])), + "neuroglancerPrecomputedMax": str(math.ceil(min_max[1])), + "neuroglancerPrecomputedFloor": str(math.floor(floor_limit[0])), + "neuroglancerPrecomputedLimit": str(math.ceil(floor_limit[1])), + } + + return output diff --git a/test/test_histogram.py b/test/test_histogram.py index 3c976e1..0844905 100644 --- a/test/test_histogram.py +++ b/test/test_histogram.py @@ -10,18 +10,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import pytools.ng.build_histogram as pytools_hist +import pytools.utils.histogram as pytools_hist import pytest from pytest import approx -import pytools.ng.build_histogram import SimpleITK as sitk import numpy as np -from click.testing import CliRunner -import json +from pathlib import Path +import pytools.workflow_functions -def test_weighted_quantile(): +def test_weighted_quantile(): data = [2] h, b = np.histogram(data, bins=np.arange(-0.5, 10.5)) @@ -77,73 +76,18 @@ def test_histogram_robust_stats(): @pytest.mark.parametrize( - "image_mrc", - [sitk.sitkUInt8, sitk.sitkInt16, sitk.sitkUInt16, sitk.sitkFloat32], - indirect=["image_mrc"], -) -def test_stream_build_histogram(image_mrc): - - bin_edges1 = np.arange(np.iinfo(np.int16).min - 0.5, np.iinfo(np.uint16).max + 1.5) - - img = sitk.ReadImage(image_mrc) - - test_args = [ - {}, - {"extract_axis": 0}, - {"histogram_bin_edges": bin_edges1}, - {"histogram_bin_edges": bin_edges1, "extract_axis": 0}, - {"histogram_bin_edges": bin_edges1, "extract_axis": 1}, - {"histogram_bin_edges": bin_edges1, "extract_axis": 2}, - {"extract_axis": 0, "extract_step": 2}, - {"extract_axis": 1, "extract_step": 3}, - {"extract_axis": 2, "extract_step": 5}, - {"histogram_bin_edges": bin_edges1, "extract_axis": 0, "extract_step": 99}, - {"histogram_bin_edges": bin_edges1, "extract_axis": 1, "extract_step": 45}, - {"histogram_bin_edges": bin_edges1, "extract_axis": 2, "extract_step": 32}, - ] - for args in test_args: - h, b = pytools_hist.stream_build_histogram(image_mrc, **args) - assert np.sum(h) == img.GetNumberOfPixels(), f"with args '{args}'" - assert np.sum(h * 0.5 * (b[1:] + b[:-1])) == np.sum(sitk.GetArrayViewFromImage(img)), f"with args '{args}'" - - -args = ["--help", "--version"] - - -@pytest.mark.parametrize("cli_args", args) -def test_histogram_mai_help(cli_args): - runner = CliRunner() - result = runner.invoke(pytools.ng.build_histogram.main, cli_args.split()) - assert not result.exception - - -@pytest.mark.parametrize( - "image_mrc,expected_min, expected_max, expected_floor, expected_limit, clamp", + "image_mrc,expected_min, expected_max, expected_floor, expected_limit", [ - (sitk.sitkUInt8, 0, 0, 0, 0, False), - (sitk.sitkInt16, 0, 0, 0, 0, True), - (sitk.sitkUInt16, 0, 0, 0, 0, False), - ("uint16_uniform", 8191, 57344, 0, 65535, True), - ("uint16_uniform", 8191, 57344, 0, 65535, False), - ("float32_uniform", 0, 1, 0, 1, False), - ("uint8_bimodal", 0, 255, 0, 255, True), - ("uint8_bimodal", -64, 319, 0, 255, False), - (sitk.sitkFloat32, 0, 1, 0, 1, True), + (sitk.sitkUInt8, 0, 0, 0, 0), + (sitk.sitkUInt16, 0, 0, 0, 0), + ("uint16_uniform", 8191, 57344, 0, 65535), + ("float32_uniform", 0, 1, 0, 1), + ("uint8_bimodal", -64, 319, 0, 255), ], indirect=["image_mrc"], ) -def test_build_histogram_main(image_mrc, expected_min, expected_max, expected_floor, expected_limit, clamp): - runner = CliRunner() - output_filename = "out.json" - args = [image_mrc, "--mad", "1.5", "--output-json", output_filename] - if clamp: - args.append("--clamp") - print(args) - with runner.isolated_filesystem(): - result = runner.invoke(pytools.ng.build_histogram.main, args=args) - assert not result.exception - with open(output_filename) as fp: - res = json.load(fp) +def test_build_histogram_main(image_mrc, expected_min, expected_max, expected_floor, expected_limit): + res = pytools.visual_min_max(Path(image_mrc), mad_scale=1.5) assert "neuroglancerPrecomputedMin" in res assert "neuroglancerPrecomputedMax" in res @@ -157,3 +101,16 @@ def test_build_histogram_main(image_mrc, expected_min, expected_max, expected_fl assert type(res["neuroglancerPrecomputedMax"]) == str assert type(res["neuroglancerPrecomputedFloor"]) == str assert type(res["neuroglancerPrecomputedLimit"]) == str + + +def test_build_histogram_zarr_main(image_ome_ngff): + res = pytools.visual_min_max(Path(image_ome_ngff) / "0", mad_scale=1.5) + + assert "neuroglancerPrecomputedMin" in res + assert "neuroglancerPrecomputedMax" in res + assert "neuroglancerPrecomputedFloor" in res + assert "neuroglancerPrecomputedLimit" in res + assert type(res["neuroglancerPrecomputedMin"]) == str + assert type(res["neuroglancerPrecomputedMax"]) == str + assert type(res["neuroglancerPrecomputedFloor"]) == str + assert type(res["neuroglancerPrecomputedLimit"]) == str