Merge pull request #455 from chourroutm/main

Adding micr_XPCTzarr dataset
bids-standard · Aug 27, 2024 · d90c132 · d90c132
2 parents b8f7f69 + ebb3bfd
commit d90c132
Show file tree

Hide file tree

Showing 22 changed files with 364 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -188,12 +188,13 @@ TABLE BELOW IS GENERATED AUTOMATICALLY.
 DO NOT EDIT DIRECTLY.
 -->
 
-| name                                                                                                        | description                                                                                     | datatypes                    | suffixes                                                                                                                  | link to full data                               | maintained by                                |
-|:------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:-----------------------------|:--------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------|:---------------------------------------------|
-| [eeg_ds003645s_hed_demo](https://github.com/bids-standard/bids-examples/tree/master/eeg_ds003645s_hed_demo) | Shows usage of Hierarchical Event Descriptor (HED) in .tsv files                                | anat, beh, eeg, micr, motion | KSSSleep, SPIM, beh, channels, coordsystem, defacemask, eeg, electrodes, events, headshape, motion, photo, samples, scans | [link](https://openneuro.org/datasets/ds003645) | [@VisLab](https://github.com/VisLab)         |
-| [micr_SEM](https://github.com/bids-standard/bids-examples/tree/master/micr_SEM)                             | Example SEM dataset in PNG format with 1 sample imaged over 2 sessions                          | micr                         | SEM, photo, samples, sessions                                                                                             | [link](https://doi.org/10.5281/zenodo.5498378)  | [@jcohenadad](https://github.com/jcohenadad) |
-| [micr_SEMzarr](https://github.com/bids-standard/bids-examples/tree/master/micr_SEMzarr)                     | Example SEM dataset in PNG and OME-ZARR format with 1 sample imaged over 2 sessions             | micr                         | SEM, SPIM, samples, sessions                                                                                              | n/a                                             | [@TheChymera](https://github.com/TheChymera) |
-| [micr_SPIM](https://github.com/bids-standard/bids-examples/tree/master/micr_SPIM)                           | Example SPIM dataset in OME-TIFF format with 2 samples from the same subject with 4 chunks each | micr                         | SPIM, photo, samples                                                                                                      | [link](https://doi.org/10.5281/zenodo.5517223)  | [@jcohenadad](https://github.com/jcohenadad) |
+| name                                                                                                        | description                                                                                     | datatypes                    | suffixes                                                                                                                  | link to full data                                            | maintained by                                |
+|:------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:-----------------------------|:--------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------|:---------------------------------------------|
+| [eeg_ds003645s_hed_demo](https://github.com/bids-standard/bids-examples/tree/master/eeg_ds003645s_hed_demo) | Shows usage of Hierarchical Event Descriptor (HED) in .tsv files                                | anat, beh, eeg, micr, motion | KSSSleep, SPIM, beh, channels, coordsystem, defacemask, eeg, electrodes, events, headshape, motion, photo, samples, scans | [link](https://openneuro.org/datasets/ds003645)              | [@VisLab](https://github.com/VisLab)         |
+| [micr_SEM](https://github.com/bids-standard/bids-examples/tree/master/micr_SEM)                             | Example SEM dataset in PNG format with 1 sample imaged over 2 sessions                          | micr                         | SEM, photo, samples, sessions                                                                                             | [link](https://doi.org/10.5281/zenodo.5498378)               | [@jcohenadad](https://github.com/jcohenadad) |
+| [micr_SEMzarr](https://github.com/bids-standard/bids-examples/tree/master/micr_SEMzarr)                     | Example SEM dataset in PNG and OME-ZARR format with 1 sample imaged over 2 sessions             | micr                         | SEM, SPIM, samples, sessions                                                                                              | n/a                                                          | [@TheChymera](https://github.com/TheChymera) |
+| [micr_SPIM](https://github.com/bids-standard/bids-examples/tree/master/micr_SPIM)                           | Example SPIM dataset in OME-TIFF format with 2 samples from the same subject with 4 chunks each | micr                         | SPIM, photo, samples                                                                                                      | [link](https://doi.org/10.5281/zenodo.5517223)               | [@jcohenadad](https://github.com/jcohenadad) |
+| [micr_XPCTzarr](https://github.com/bids-standard/bids-examples/tree/master/micr_XPCTzarr)                   | Example XPCT dataset in OME-ZARR format with 1 sample imaged                                    | micr                         | XPCT, photo, samples, sessions                                                                                            | [link](https://human-organ-atlas.esrf.eu/datasets/572252538) | [@chourroutm](https://github.com/chourroutm) |
 
 ### Motion
 

diff --git a/dataset_listing.tsv b/dataset_listing.tsv
@@ -59,6 +59,7 @@ pet005	T1w, PET		[@mnoergaard](https://github.com/mnoergaard)	anat, pet	T1w, eve
 micr_SEM	Example SEM dataset in PNG format with 1 sample imaged over 2 sessions	[link](https://doi.org/10.5281/zenodo.5498378)	[@jcohenadad](https://github.com/jcohenadad)	micr	SEM, photo, samples, sessions
 micr_SEMzarr	Example SEM dataset in PNG and OME-ZARR format with 1 sample imaged over 2 sessions		[@TheChymera](https://github.com/TheChymera)	micr	SEM, SPIM, samples, sessions
 micr_SPIM	Example SPIM dataset in OME-TIFF format with 2 samples from the same subject with 4 chunks each	[link](https://doi.org/10.5281/zenodo.5517223)	[@jcohenadad](https://github.com/jcohenadad)	micr	SPIM, photo, samples
+micr_XPCTzarr	Example XPCT dataset in OME-ZARR format with 1 sample imaged	[link](https://human-organ-atlas.esrf.eu/datasets/572252538)	[@chourroutm](https://github.com/chourroutm)	micr	XPCT, photo, samples, sessions
 fnirs_tapping	Example fNIRS measurement with three conditions from five subjects	[link](https://doi.org/10.5281/zenodo.5529797)	[@rob-luke](https://github.com/rob-luke)	nirs	channels, coordsystem, events, nirs, optodes, scans
 fnirs_automaticity	24 subjects performing (non-)automatic finger tapping and foot stepping	[link](https://doi.org/10.34973/vesb-mh30)	[@robertoostenveld](https://github.com/robertoostenveld)	nirs	channels, coordsystem, events, nirs, optodes, practicelogbook, scans
 motion_systemvalidation	Example dataset of two different motion captured system recorded almost simultaneously, but no brain data	[link](https://doi.org/10.6084/m9.figshare.20238006.v2)	[@JuliusWelzel](https://github.com/JuliusWelzel)	motion	channels, motion, scans

diff --git a/micr_XPCTzarr/README.md b/micr_XPCTzarr/README.md
@@ -0,0 +1,3 @@
+1 human brain sample imaged over 1 session.
+Example dataset with empty images containing X-ray phase-contrast tomography (XPCT) data to illustrate BIDS convention.
+Using NGFF nested directory format example
diff --git a/micr_XPCTzarr/code/data_retrieval.py b/micr_XPCTzarr/code/data_retrieval.py
@@ -0,0 +1,77 @@
+from pathlib import Path
+import zarr.convenience
+import dask.array
+import ome_zarr, ome_zarr.io, ome_zarr.writer
+from numcodecs import Blosc, Delta
+import time
+
+"""
+You may use the following command to prepare a Python 3.8+ environment for the download of the dataset: `pip install -r data_retrieval_reqs.txt`
+"""
+
+data_path = Path("../sub-LADAF-2020-31/ses-01/micr/") # path to session
+data_path.mkdir(exist_ok=True) # create directory
+
+# short utility function
+def zarr_array(url,selector):
+    print('Retrieving data from', url)
+    n5_store = zarr.N5FSStore(url)
+    root = zarr.group(store=n5_store)
+    return dask.array.from_zarr(root[selector])
+
+dataset_full = zarr_array("gs://ucl-hip-ct-35a68e99feaae8932b1d44da0358940b/LADAF-2020-31/brain/25.08um_complete-organ_bm05/","s0") # get a dask.array.Array that points to the whole N5 dataset
+
+# default filters and compressors made the script crash; the ones below work:
+filters = [Delta(dtype='i4')]
+compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE)
+
+path_roi = data_path / "sub-LADAF-2020-31_ses-01_sample-brain_XPCT.ome.zarr" # full name of the dataset following BIDS specification
+
+print('Writing in', str(path_roi))
+
+tic = time.time()
+
+store = ome_zarr.io.parse_url(path_roi,mode="a").store # NB: `mode="a"` should allow overwrite but it does not at the moment, see https://github.com/ome/ome-zarr-py/issues/376
+root = zarr.group(store=store)
+ome_zarr.writer.write_image(image=dataset_full,
+                            group=root,
+                            scaler=None,
+                            axes=[
+                                {
+                                    "name": "z",
+                                    "type": "space",
+                                    "units": "micrometer" # voxel size is an isotropic 25.08 um
+                                },
+                                {
+                                    "name": "y",
+                                    "type": "space",
+                                    "units": "micrometer" # voxel size is an isotropic 25.08 um
+                                },
+                                {
+                                    "name": "x",
+                                    "type": "space",
+                                    "units": "micrometer" # voxel size is an isotropic 25.08 um
+                                }
+                            ], # axis order of the dataset following BIDS specification
+                            coordinate_transformations=[
+                                [
+                                    {
+                                        "scale": [
+                                            25.08,
+                                            25.08,
+                                            25.08 
+                                        ], # voxel size is an isotropic 25.08 um
+                                        "type": "scale"
+                                    }
+                                ] 
+                            ],
+                            storage_options=dict(
+                                chunks=(512,512,512), # this chunk size may be altered depending on someone's needs
+                                filters=filters, # default filters made the script crash
+                                compressor=compressor # default compressors made the script crash
+                            )
+                        )
+
+toc = time.time()
+
+print('Writing completed in', (toc - tic)/60, 'min!')
diff --git a/micr_XPCTzarr/code/data_retrieval_reqs.txt b/micr_XPCTzarr/code/data_retrieval_reqs.txt
@@ -0,0 +1,5 @@
+dask[complete]
+zarr
+ome-zarr
+numcodecs
+gcsfs
diff --git a/micr_XPCTzarr/code/metadata_formatter.py b/micr_XPCTzarr/code/metadata_formatter.py
@@ -0,0 +1,39 @@
+import urllib.request
+import json
+
+"""
+You may use the following command to prepare a Python 3.8+ environment for the download of the dataset: `pip install -r metadata_formatter_reqs.txt`
+"""
+
+# The following metadata file can be downloaded from https://human-organ-atlas.esrf.eu/datasets/572252538
+metadata_txt_file_uri = 'https://ids.esrf.fr/ids/getData?sessionId=182d0a3b-de3b-4602-8caf-9bd91dc5b0e5&datafileIds=572252539' # This URI expires quite often
+
+req = urllib.request.urlopen(metadata_txt_file_uri)
+
+json_dict = {}
+
+for line in urllib.request.urlopen(metadata_txt_file_uri):
+    text = line.decode('utf-8')
+    if text.startswith('#') or text == '\r\n':
+        pass
+    else:
+        s = text.replace('\t','').replace('\r\n','').replace('"', '').replace('N.A.', 'n/a').split('=')
+
+        try:
+            s[1] = int(s[1])
+        except:
+            try:
+                s[1] = float(s[1])
+            except:
+                pass
+        json_dict[s[0]] = s[1]
+json_obj = json.dumps(json_dict)
+print(json_obj)
+
+"""
+The metadata has been split into several files:
+  - micr_XPCTzarr/samples.json
+  - micr_XPCTzarr/sub-LADAF-2020-31/sub-LADAF-2020-31_sessions.tsv
+  - micr_XPCTzarr/sub-LADAF-2020-31/ses-01/micr/sub-LADAF-2020-31_ses-01_sample-brain_XPCT.json
+Note that some fields are redundant. 
+"""
diff --git a/micr_XPCTzarr/code/metadata_formatter_reqs.txt b/micr_XPCTzarr/code/metadata_formatter_reqs.txt
@@ -0,0 +1 @@
+urllib3
diff --git a/micr_XPCTzarr/code/truncate_data.sh b/micr_XPCTzarr/code/truncate_data.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# Bash script to truncate all the files containing the binary chunked data as per ../../CONTRIBUTING.md
+# Modified from ../../CONTRIBUTING.md
+
+find ../sub-LADAF-2020-31/ses-01/micr/sub-LADAF-2020-31_ses-01_sample-brain_XPCT.ome.zarr/ -type f -regex ".*/[0-9]*" -exec truncate -s 0 {} +
diff --git a/micr_XPCTzarr/dataset_description.json b/micr_XPCTzarr/dataset_description.json
@@ -0,0 +1,15 @@
+{
+	"Name": "micr_XPCTzarr",
+	"Authors": [
+		"Matthieu Chourrout",
+		"David Stansby",
+		"Guillaume Gaisne",
+		"Claire L. Walsh",
+		"Peter D. Lee"
+	],
+	"BIDSVersion": "1.10.0",
+	"License": "CC-BY-4.0",
+	"DatasetDOI": "doi:10.15151/ESRF-DC-572252655",
+	"DatasetType": "raw",
+	"HowToAcknowledge": "Please cite this dataset as: Tafforeau, P., Walsh, C., Wagner, W. L.,  Daniyal J. Jafree, Bellier, A., Werlein, C., Kühnel, M. P., Boller, E., Walker-Samuel, S., Robertus, J. L., Long, D. A., Jacob, J., Marussi, S.,  Eeline Brown, Holroyd, N., Jonigk, D. D., Ackermann, M., & Lee, P. D. (2021). Complete brain from the body donor LADAF-2020-31 (Version 1) [dataset]. European Synchrotron Radiation Facility. https://doi.org/10.15151/ESRF-DC-572252655"
+}
diff --git a/micr_XPCTzarr/participants.json b/micr_XPCTzarr/participants.json
@@ -0,0 +1,36 @@
+{
+    "participant_id": {
+        "Description": "Unique alphanumeric participant ID starting with sub-"
+    },
+    "participant_name": {
+        "Description": "full name or pseudo of the participant which can contain non-alphanumeric characters"
+    },
+    "sex": {
+        "Description": "sex of the participant",
+        "Levels": {
+            "M": "male",
+            "F": "female"
+        }
+    },
+    "age": {
+        "Description": "age of the participant",
+        "Units": "year"
+    },
+    "weight": {
+        "Description": "weight of the participant",
+        "Units": "kg"
+    },
+    "height": {
+        "Description": "height of the participant",
+        "Units": "cm"
+    },
+    "species": {
+        "Description": "binomial species name from the NCBI Taxonomy (https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi)"
+    },
+    "medical_information": {
+        "Description": "medical information as provided by the biobank"
+    },
+    "institute": {
+        "Description": "name of the biobank"
+    }
+}
diff --git a/micr_XPCTzarr/participants.tsv b/micr_XPCTzarr/participants.tsv
@@ -0,0 +1,2 @@
+participant_id	participant_name	sex	age	species	weight	height	institute	medical_information
+sub-01	LADAF-2020-31	F	69	homo sapiens	40	145	Laboratoire d'Anatomie des Alpes Françaises	type 2 diabetes, pelvic radiation to treat cancer of the uterus, right colectomy (benign lesion on histopathology), bilateral nephrostomy for acute obstructive renal failure, cystectomy, omentectomy and peritoneal carcinoma with occlusive syndrome
diff --git a/micr_XPCTzarr/samples.json b/micr_XPCTzarr/samples.json
@@ -0,0 +1,17 @@
+{
+    "sample_id": {
+        "Description": "Sample ID"
+    },
+    "participant_id": {
+        "Description": "Participant ID from whom tissue samples have been acquired"
+    },
+    "sample_type": {
+        "Description": "Type of sample from ENCODE Biosample Type (https://www.encodeproject.org/profiles/biosample_type)"
+    },
+    "sample_info": {
+        "Description": "One-line title of the sample"
+    },
+    "sample_preparation": {
+        "Description": "Specific preparation of the sample"
+    }
+}
diff --git a/micr_XPCTzarr/samples.tsv b/micr_XPCTzarr/samples.tsv
@@ -0,0 +1,2 @@
+sample_id	participant_id	sample_type	sample_info 	sample_preparation
+sample-brain	sub-01	tissue	complete brain from the body donor program of the Laboratoire d'Anatomie des Alpes Francaise (LADAF)	formalin fixed, progressive transfer to ethanol 70% with gentle vacuum degassing at each step, mounted with mixed agar gel at 70% ethanol, n.b. some damages due to the too rapid vacuum degassing
diff --git a/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json
@@ -0,0 +1,57 @@
+{
+        "InstitutionName": "European Synchrotron Radiation Facility",
+        "StationName": "BM05 EBS dipole wiggler 0.85T",
+        "BodyPart": "BRAIN",
+        "SampleEnvironment": "ex vivo",
+        "SampleFixation": "formalin",
+        "SampleEmbedding": "mixed agar gel at 70% ethanol",
+        "PixelSize": [
+                25.08,
+                25.08,
+                25.08
+        ],
+        "PixelSizeUnits": "um",
+        "AcquisitionParameters": {
+                "XStep": "n/a",
+                "XStages": 1,
+                "YStep": "n/a",
+                "YStages": 1,
+                "ZStep ": "2.2 mm",
+                "ZStages ": "2x79",
+                "Projections": 9990,
+                "RefN": "n/a",
+                "DarkN": 400,
+                "RefOn": "n/a",
+                "ScanningMode ": "continuous",
+                "ExposureTime": "0.036 s",
+                "AccExposureTime ": "0.006 s",
+                "AccFramesCount": 6,
+                "PropDistance": "3475 mm",
+                "Filters": "Mo 0.1mm Al 2mm SiO2 bars 12*5mm diameter",
+                "DetAvgEnergy": "93 keV",
+                "ScanGeometry": "quarter-acquisition, one scan in half-acquisition plus one annular scan",
+                "ScanRange": "360 deg",
+                "SensorName": "sCMOS PCO edge 4.2 CLHS",
+                "SensorMode": "rolling shutter",
+                "SensorPixelSize": "6.5 um",
+                "Magnification": 0.24,
+                "XPixelNum": 2048,
+                "YPixelNum": 176,
+                "OpticsType": "dzoom optic from BM05 based on Hasselblad 120mm F/4 macro objective",
+                "Scintillator": "LuAG:Ce 2000 um",
+                "SurfDoseRate": "10.5 Gy/s",
+                "VoiDoseRate": "10.5 Gy/s",
+                "VoiIntegDose": "2.48 kGy",
+                "ScanTime": "7.88 min",
+                "SeriesTime": "22 h"
+        },
+        "ProcessingParameters": {
+                "RefApproach": "reference jar with 70% ethanol, single reference",
+                "VolumeX": 5965,
+                "VolumeY": 5965,
+                "VolumeZ": 6990,
+                "32to16BitsMin": -0.04,
+                "32to16BitsMax": 0.1,
+                "Jp2ComprRatio": 10
+        }
+}
diff --git a/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zattrs b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zattrs
@@ -0,0 +1,40 @@
+{
+    "multiscales": [
+        {
+            "axes": [
+                {
+                    "name": "z",
+                    "type": "space",
+                    "units": "micrometer"
+                },
+                {
+                    "name": "y",
+                    "type": "space",
+                    "units": "micrometer"
+                },
+                {
+                    "name": "x",
+                    "type": "space",
+                    "units": "micrometer"
+                }
+            ],
+            "datasets": [
+                {
+                    "coordinateTransformations": [
+                        {
+                            "scale": [
+                                25.08,
+                                25.08,
+                                25.08
+                            ],
+                            "type": "scale"
+                        }
+                    ],
+                    "path": "0"
+                }
+            ],
+            "name": "/",
+            "version": "0.4"
+        }
+    ]
+}
diff --git a/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zgroup b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zgroup
@@ -0,0 +1,3 @@
+{
+    "zarr_format": 2
+}