-
Notifications
You must be signed in to change notification settings - Fork 135
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #455 from chourroutm/main
Adding micr_XPCTzarr dataset
- Loading branch information
Showing
22 changed files
with
364 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
1 human brain sample imaged over 1 session. | ||
Example dataset with empty images containing X-ray phase-contrast tomography (XPCT) data to illustrate BIDS convention. | ||
Using NGFF nested directory format example |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
from pathlib import Path | ||
import zarr.convenience | ||
import dask.array | ||
import ome_zarr, ome_zarr.io, ome_zarr.writer | ||
from numcodecs import Blosc, Delta | ||
import time | ||
|
||
""" | ||
You may use the following command to prepare a Python 3.8+ environment for the download of the dataset: `pip install -r data_retrieval_reqs.txt` | ||
""" | ||
|
||
data_path = Path("../sub-LADAF-2020-31/ses-01/micr/") # path to session | ||
data_path.mkdir(exist_ok=True) # create directory | ||
|
||
# short utility function | ||
def zarr_array(url,selector): | ||
print('Retrieving data from', url) | ||
n5_store = zarr.N5FSStore(url) | ||
root = zarr.group(store=n5_store) | ||
return dask.array.from_zarr(root[selector]) | ||
|
||
dataset_full = zarr_array("gs://ucl-hip-ct-35a68e99feaae8932b1d44da0358940b/LADAF-2020-31/brain/25.08um_complete-organ_bm05/","s0") # get a dask.array.Array that points to the whole N5 dataset | ||
|
||
# default filters and compressors made the script crash; the ones below work: | ||
filters = [Delta(dtype='i4')] | ||
compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE) | ||
|
||
path_roi = data_path / "sub-LADAF-2020-31_ses-01_sample-brain_XPCT.ome.zarr" # full name of the dataset following BIDS specification | ||
|
||
print('Writing in', str(path_roi)) | ||
|
||
tic = time.time() | ||
|
||
store = ome_zarr.io.parse_url(path_roi,mode="a").store # NB: `mode="a"` should allow overwrite but it does not at the moment, see https://github.com/ome/ome-zarr-py/issues/376 | ||
root = zarr.group(store=store) | ||
ome_zarr.writer.write_image(image=dataset_full, | ||
group=root, | ||
scaler=None, | ||
axes=[ | ||
{ | ||
"name": "z", | ||
"type": "space", | ||
"units": "micrometer" # voxel size is an isotropic 25.08 um | ||
}, | ||
{ | ||
"name": "y", | ||
"type": "space", | ||
"units": "micrometer" # voxel size is an isotropic 25.08 um | ||
}, | ||
{ | ||
"name": "x", | ||
"type": "space", | ||
"units": "micrometer" # voxel size is an isotropic 25.08 um | ||
} | ||
], # axis order of the dataset following BIDS specification | ||
coordinate_transformations=[ | ||
[ | ||
{ | ||
"scale": [ | ||
25.08, | ||
25.08, | ||
25.08 | ||
], # voxel size is an isotropic 25.08 um | ||
"type": "scale" | ||
} | ||
] | ||
], | ||
storage_options=dict( | ||
chunks=(512,512,512), # this chunk size may be altered depending on someone's needs | ||
filters=filters, # default filters made the script crash | ||
compressor=compressor # default compressors made the script crash | ||
) | ||
) | ||
|
||
toc = time.time() | ||
|
||
print('Writing completed in', (toc - tic)/60, 'min!') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
dask[complete] | ||
zarr | ||
ome-zarr | ||
numcodecs | ||
gcsfs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import urllib.request | ||
import json | ||
|
||
""" | ||
You may use the following command to prepare a Python 3.8+ environment for the download of the dataset: `pip install -r metadata_formatter_reqs.txt` | ||
""" | ||
|
||
# The following metadata file can be downloaded from https://human-organ-atlas.esrf.eu/datasets/572252538 | ||
metadata_txt_file_uri = 'https://ids.esrf.fr/ids/getData?sessionId=182d0a3b-de3b-4602-8caf-9bd91dc5b0e5&datafileIds=572252539' # This URI expires quite often | ||
|
||
req = urllib.request.urlopen(metadata_txt_file_uri) | ||
|
||
json_dict = {} | ||
|
||
for line in urllib.request.urlopen(metadata_txt_file_uri): | ||
text = line.decode('utf-8') | ||
if text.startswith('#') or text == '\r\n': | ||
pass | ||
else: | ||
s = text.replace('\t','').replace('\r\n','').replace('"', '').replace('N.A.', 'n/a').split('=') | ||
|
||
try: | ||
s[1] = int(s[1]) | ||
except: | ||
try: | ||
s[1] = float(s[1]) | ||
except: | ||
pass | ||
json_dict[s[0]] = s[1] | ||
json_obj = json.dumps(json_dict) | ||
print(json_obj) | ||
|
||
""" | ||
The metadata has been split into several files: | ||
- micr_XPCTzarr/samples.json | ||
- micr_XPCTzarr/sub-LADAF-2020-31/sub-LADAF-2020-31_sessions.tsv | ||
- micr_XPCTzarr/sub-LADAF-2020-31/ses-01/micr/sub-LADAF-2020-31_ses-01_sample-brain_XPCT.json | ||
Note that some fields are redundant. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
urllib3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
# Bash script to truncate all the files containing the binary chunked data as per ../../CONTRIBUTING.md | ||
# Modified from ../../CONTRIBUTING.md | ||
|
||
find ../sub-LADAF-2020-31/ses-01/micr/sub-LADAF-2020-31_ses-01_sample-brain_XPCT.ome.zarr/ -type f -regex ".*/[0-9]*" -exec truncate -s 0 {} + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"Name": "micr_XPCTzarr", | ||
"Authors": [ | ||
"Matthieu Chourrout", | ||
"David Stansby", | ||
"Guillaume Gaisne", | ||
"Claire L. Walsh", | ||
"Peter D. Lee" | ||
], | ||
"BIDSVersion": "1.10.0", | ||
"License": "CC-BY-4.0", | ||
"DatasetDOI": "doi:10.15151/ESRF-DC-572252655", | ||
"DatasetType": "raw", | ||
"HowToAcknowledge": "Please cite this dataset as: Tafforeau, P., Walsh, C., Wagner, W. L., Daniyal J. Jafree, Bellier, A., Werlein, C., Kühnel, M. P., Boller, E., Walker-Samuel, S., Robertus, J. L., Long, D. A., Jacob, J., Marussi, S., Eeline Brown, Holroyd, N., Jonigk, D. D., Ackermann, M., & Lee, P. D. (2021). Complete brain from the body donor LADAF-2020-31 (Version 1) [dataset]. European Synchrotron Radiation Facility. https://doi.org/10.15151/ESRF-DC-572252655" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"participant_id": { | ||
"Description": "Unique alphanumeric participant ID starting with sub-" | ||
}, | ||
"participant_name": { | ||
"Description": "full name or pseudo of the participant which can contain non-alphanumeric characters" | ||
}, | ||
"sex": { | ||
"Description": "sex of the participant", | ||
"Levels": { | ||
"M": "male", | ||
"F": "female" | ||
} | ||
}, | ||
"age": { | ||
"Description": "age of the participant", | ||
"Units": "year" | ||
}, | ||
"weight": { | ||
"Description": "weight of the participant", | ||
"Units": "kg" | ||
}, | ||
"height": { | ||
"Description": "height of the participant", | ||
"Units": "cm" | ||
}, | ||
"species": { | ||
"Description": "binomial species name from the NCBI Taxonomy (https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi)" | ||
}, | ||
"medical_information": { | ||
"Description": "medical information as provided by the biobank" | ||
}, | ||
"institute": { | ||
"Description": "name of the biobank" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
participant_id participant_name sex age species weight height institute medical_information | ||
sub-01 LADAF-2020-31 F 69 homo sapiens 40 145 Laboratoire d'Anatomie des Alpes Françaises type 2 diabetes, pelvic radiation to treat cancer of the uterus, right colectomy (benign lesion on histopathology), bilateral nephrostomy for acute obstructive renal failure, cystectomy, omentectomy and peritoneal carcinoma with occlusive syndrome |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"sample_id": { | ||
"Description": "Sample ID" | ||
}, | ||
"participant_id": { | ||
"Description": "Participant ID from whom tissue samples have been acquired" | ||
}, | ||
"sample_type": { | ||
"Description": "Type of sample from ENCODE Biosample Type (https://www.encodeproject.org/profiles/biosample_type)" | ||
}, | ||
"sample_info": { | ||
"Description": "One-line title of the sample" | ||
}, | ||
"sample_preparation": { | ||
"Description": "Specific preparation of the sample" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
sample_id participant_id sample_type sample_info sample_preparation | ||
sample-brain sub-01 tissue complete brain from the body donor program of the Laboratoire d'Anatomie des Alpes Francaise (LADAF) formalin fixed, progressive transfer to ethanol 70% with gentle vacuum degassing at each step, mounted with mixed agar gel at 70% ethanol, n.b. some damages due to the too rapid vacuum degassing |
57 changes: 57 additions & 0 deletions
57
micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
{ | ||
"InstitutionName": "European Synchrotron Radiation Facility", | ||
"StationName": "BM05 EBS dipole wiggler 0.85T", | ||
"BodyPart": "BRAIN", | ||
"SampleEnvironment": "ex vivo", | ||
"SampleFixation": "formalin", | ||
"SampleEmbedding": "mixed agar gel at 70% ethanol", | ||
"PixelSize": [ | ||
25.08, | ||
25.08, | ||
25.08 | ||
], | ||
"PixelSizeUnits": "um", | ||
"AcquisitionParameters": { | ||
"XStep": "n/a", | ||
"XStages": 1, | ||
"YStep": "n/a", | ||
"YStages": 1, | ||
"ZStep ": "2.2 mm", | ||
"ZStages ": "2x79", | ||
"Projections": 9990, | ||
"RefN": "n/a", | ||
"DarkN": 400, | ||
"RefOn": "n/a", | ||
"ScanningMode ": "continuous", | ||
"ExposureTime": "0.036 s", | ||
"AccExposureTime ": "0.006 s", | ||
"AccFramesCount": 6, | ||
"PropDistance": "3475 mm", | ||
"Filters": "Mo 0.1mm Al 2mm SiO2 bars 12*5mm diameter", | ||
"DetAvgEnergy": "93 keV", | ||
"ScanGeometry": "quarter-acquisition, one scan in half-acquisition plus one annular scan", | ||
"ScanRange": "360 deg", | ||
"SensorName": "sCMOS PCO edge 4.2 CLHS", | ||
"SensorMode": "rolling shutter", | ||
"SensorPixelSize": "6.5 um", | ||
"Magnification": 0.24, | ||
"XPixelNum": 2048, | ||
"YPixelNum": 176, | ||
"OpticsType": "dzoom optic from BM05 based on Hasselblad 120mm F/4 macro objective", | ||
"Scintillator": "LuAG:Ce 2000 um", | ||
"SurfDoseRate": "10.5 Gy/s", | ||
"VoiDoseRate": "10.5 Gy/s", | ||
"VoiIntegDose": "2.48 kGy", | ||
"ScanTime": "7.88 min", | ||
"SeriesTime": "22 h" | ||
}, | ||
"ProcessingParameters": { | ||
"RefApproach": "reference jar with 70% ethanol, single reference", | ||
"VolumeX": 5965, | ||
"VolumeY": 5965, | ||
"VolumeZ": 6990, | ||
"32to16BitsMin": -0.04, | ||
"32to16BitsMax": 0.1, | ||
"Jp2ComprRatio": 10 | ||
} | ||
} |
40 changes: 40 additions & 0 deletions
40
micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zattrs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
{ | ||
"multiscales": [ | ||
{ | ||
"axes": [ | ||
{ | ||
"name": "z", | ||
"type": "space", | ||
"units": "micrometer" | ||
}, | ||
{ | ||
"name": "y", | ||
"type": "space", | ||
"units": "micrometer" | ||
}, | ||
{ | ||
"name": "x", | ||
"type": "space", | ||
"units": "micrometer" | ||
} | ||
], | ||
"datasets": [ | ||
{ | ||
"coordinateTransformations": [ | ||
{ | ||
"scale": [ | ||
25.08, | ||
25.08, | ||
25.08 | ||
], | ||
"type": "scale" | ||
} | ||
], | ||
"path": "0" | ||
} | ||
], | ||
"name": "/", | ||
"version": "0.4" | ||
} | ||
] | ||
} |
3 changes: 3 additions & 0 deletions
3
micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zgroup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"zarr_format": 2 | ||
} |
Oops, something went wrong.