From 3147e01bfbd2a058f41af73eb55debb3dea519b2 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 13 Aug 2021 11:06:53 -0400 Subject: [PATCH] [ENH] Use different masking thresholds for denoising and classification (#736) * Use two different adaptive masks. masksum_denoise: Threshold of 1, used for denoising and optimal combination. masksum_clf: Threshold of 3, used for decomposition and component selection. * Fix long line. * Derive classifier masks directly from denoising ones. * Use denoising mask for component figures. * Swap full and limited T2*/S0 maps. Now that the full maps are used throughout the pipeline, they are the primary T2*/S0 outputs. The limited maps are not used for anything, and are only written out if the workflow is run in verbose mode. * Apply the same logic to t2smap. * Fix tests. * Fix documentation about full vs. limited maps. * Fix bad merge. * Log updated masking procedure. * Fix! --- docs/approach.rst | 5 +- docs/outputs.rst | 36 +++++----- tedana/resources/config/outputs.json | 16 ++--- tedana/tests/data/fiu_four_echo_outputs.txt | 4 +- .../data/nih_five_echo_outputs_t2smap.txt | 4 +- .../data/nih_five_echo_outputs_verbose.txt | 4 +- tedana/tests/test_t2smap.py | 20 +++--- tedana/workflows/t2smap.py | 46 +++++++------ tedana/workflows/tedana.py | 66 ++++++++++++------- 9 files changed, 113 insertions(+), 88 deletions(-) diff --git a/docs/approach.rst b/docs/approach.rst index edf9a1961..8dd846e6a 100644 --- a/docs/approach.rst +++ b/docs/approach.rst @@ -108,12 +108,9 @@ this voxel), so the line is fit to all available data. ``tedana`` actually performs and uses two sets of :math:`T_{2}^*`/:math:`S_0` model fits. In one case, ``tedana`` estimates :math:`T_{2}^*` and :math:`S_0` for voxels with good signal in at least two echoes. - The resulting "limited" :math:`T_{2}^*` and :math:`S_0` maps are used throughout - most of the pipeline. In the other case, ``tedana`` estimates :math:`T_{2}^*` and :math:`S_0` for voxels with good data in only one echo as well, but uses the first two echoes for those voxels. - The resulting "full" :math:`T_{2}^*` and :math:`S_0` maps are used to generate the - optimally combined data. + The resulting "full" :math:`T_{2}^*` and :math:`S_0` maps are used throughout the rest of the pipeline. .. image:: /_static/a05_loglinear_regression.png diff --git a/docs/outputs.rst b/docs/outputs.rst index b6bd1eda9..4eefb884e 100644 --- a/docs/outputs.rst +++ b/docs/outputs.rst @@ -13,19 +13,19 @@ Outputs of the tedana workflow Filename Content ================================================ ===================================================== dataset_description.json Top-level metadata for the workflow. -T2starmap.nii.gz Limited estimated T2* 3D map. +T2starmap.nii.gz Full estimated T2* 3D map. Values are in seconds. The difference between the limited and full maps is that, for voxels affected by dropout where - only one echo contains good data, the full map - uses the single echo's value while the limited - map has a NaN. -S0map.nii.gz Limited S0 3D map. + only one echo contains good data, the full map uses + the T2* estimate from the first two echoes, while the + limited map has a NaN. +S0map.nii.gz Full S0 3D map. The difference between the limited and full maps is that, for voxels affected by dropout where - only one echo contains good data, the full map - uses the single echo's value while the limited - map has a NaN. + only one echo contains good data, the full map uses + the S0 estimate from the first two echoes, while the + limited map has a NaN. desc-optcom_bold.nii.gz Optimally combined time series. desc-optcomDenoised_bold.nii.gz Denoised optimally combined time series. Recommended dataset for analysis. @@ -79,15 +79,19 @@ If ``verbose`` is set to True: ============================================================== ===================================================== Filename Content ============================================================== ===================================================== -desc-full_T2starmap.nii.gz Full T2* map/time series. +desc-limited_T2starmap.nii.gz Limited T2* map/time series. Values are in seconds. - The difference between the limited and full maps is - that, for voxels affected by dropout where only one - echo contains good data, the full map uses the - single echo's value while the limited map has a NaN. - Only used for optimal combination. -desc-full_S0map.nii.gz Full S0 map/time series. Only used for optimal - combination. + The difference between the limited and full maps + is that, for voxels affected by dropout where + only one echo contains good data, the full map uses + the S0 estimate from the first two echoes, while the + limited map has a NaN. +desc-limited_S0map.nii.gz Limited S0 map/time series. + The difference between the limited and full maps + is that, for voxels affected by dropout where + only one echo contains good data, the full map uses + the S0 estimate from the first two echoes, while the + limited map has a NaN. echo-[echo]_desc-[PCA|ICA]_components.nii.gz Echo-wise PCA/ICA component weight maps. echo-[echo]_desc-[PCA|ICA]R2ModelPredictions_components.nii.gz Component- and voxel-wise R2-model predictions, separated by echo. diff --git a/tedana/resources/config/outputs.json b/tedana/resources/config/outputs.json index 186115a41..b67bfd8dc 100644 --- a/tedana/resources/config/outputs.json +++ b/tedana/resources/config/outputs.json @@ -4,11 +4,11 @@ "bidsv1.5.0": "desc-adaptiveGoodSignal_mask" }, "t2star img": { - "orig": "t2sv", + "orig": "t2svG", "bidsv1.5.0": "T2starmap" }, "s0 img": { - "orig": "s0v", + "orig": "s0vG", "bidsv1.5.0": "S0map" }, "combined img": { @@ -47,13 +47,13 @@ "orig": "lowk_ts_OC", "bidsv1.5.0": "desc-optcomRejected_bold" }, - "full t2star img": { - "orig": "t2svG", - "bidsv1.5.0": "desc-full_T2starmap" + "limited t2star img": { + "orig": "t2sv", + "bidsv1.5.0": "desc-limited_T2starmap" }, - "full s0 img": { - "orig": "s0vG", - "bidsv1.5.0": "desc-full_S0map" + "limited s0 img": { + "orig": "s0v", + "bidsv1.5.0": "desc-limited_S0map" }, "whitened img": { "orig": "ts_OC_whitened", diff --git a/tedana/tests/data/fiu_four_echo_outputs.txt b/tedana/tests/data/fiu_four_echo_outputs.txt index ec79e6cce..7e9ce1169 100644 --- a/tedana/tests/data/fiu_four_echo_outputs.txt +++ b/tedana/tests/data/fiu_four_echo_outputs.txt @@ -20,10 +20,10 @@ desc-PCA_mixing.tsv desc-PCA_stat-z_components.nii.gz desc-T1likeEffect_min.nii.gz desc-adaptiveGoodSignal_mask.nii.gz -desc-full_S0map.nii.gz -desc-full_T2starmap.nii.gz desc-globalSignal_map.nii.gz desc-globalSignal_timeseries.tsv +desc-limited_S0map.nii.gz +desc-limited_T2starmap.nii.gz desc-optcomAcceptedMIRDenoised_bold.nii.gz desc-optcomAccepted_bold.nii.gz desc-optcomDenoised_bold.nii.gz diff --git a/tedana/tests/data/nih_five_echo_outputs_t2smap.txt b/tedana/tests/data/nih_five_echo_outputs_t2smap.txt index 44424c03f..ce203aebd 100644 --- a/tedana/tests/data/nih_five_echo_outputs_t2smap.txt +++ b/tedana/tests/data/nih_five_echo_outputs_t2smap.txt @@ -1,6 +1,6 @@ dataset_description.json -desc-full_S0map.nii.gz -desc-full_T2starmap.nii.gz +desc-limited_S0map.nii.gz +desc-limited_T2starmap.nii.gz desc-optcom_bold.nii.gz S0map.nii.gz T2starmap.nii.gz diff --git a/tedana/tests/data/nih_five_echo_outputs_verbose.txt b/tedana/tests/data/nih_five_echo_outputs_verbose.txt index d9d7ca1dc..907b4ec49 100644 --- a/tedana/tests/data/nih_five_echo_outputs_verbose.txt +++ b/tedana/tests/data/nih_five_echo_outputs_verbose.txt @@ -18,8 +18,8 @@ desc-PCA_metrics.tsv desc-PCA_mixing.tsv desc-PCA_stat-z_components.nii.gz desc-adaptiveGoodSignal_mask.nii.gz -desc-full_S0map.nii.gz -desc-full_T2starmap.nii.gz +desc-limited_S0map.nii.gz +desc-limited_T2starmap.nii.gz desc-optcomAccepted_bold.nii.gz desc-optcomDenoised_bold.nii.gz desc-optcomPCAReduced_bold.nii.gz diff --git a/tedana/tests/test_t2smap.py b/tedana/tests/test_t2smap.py index 2dddc5754..071d91b4d 100644 --- a/tedana/tests/test_t2smap.py +++ b/tedana/tests/test_t2smap.py @@ -31,9 +31,9 @@ def test_basic_t2smap1(self): assert len(img.shape) == 3 img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_T2starmap.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_S0map.nii.gz')) assert len(img.shape) == 3 img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 @@ -57,9 +57,9 @@ def test_basic_t2smap2(self): assert len(img.shape) == 4 img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_T2starmap.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_S0map.nii.gz')) assert len(img.shape) == 4 img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 @@ -83,9 +83,9 @@ def test_basic_t2smap3(self): assert len(img.shape) == 3 img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_T2starmap.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_S0map.nii.gz')) assert len(img.shape) == 3 img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 @@ -109,9 +109,9 @@ def test_basic_t2smap4(self): assert len(img.shape) == 4 img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_T2starmap.nii.gz')) assert len(img.shape) == 4 - img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_S0map.nii.gz')) assert len(img.shape) == 4 img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 @@ -135,9 +135,9 @@ def test_t2smap_cli(self): assert len(img.shape) == 3 img = nib.load(op.join(out_dir, 'S0map.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 'desc-full_T2starmap.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_T2starmap.nii.gz')) assert len(img.shape) == 3 - img = nib.load(op.join(out_dir, 'desc-full_S0map.nii.gz')) + img = nib.load(op.join(out_dir, 'desc-limited_S0map.nii.gz')) assert len(img.shape) == 3 img = nib.load(op.join(out_dir, 'desc-optcom_bold.nii.gz')) assert len(img.shape) == 4 diff --git a/tedana/workflows/t2smap.py b/tedana/workflows/t2smap.py index 4970a305f..58c80b729 100644 --- a/tedana/workflows/t2smap.py +++ b/tedana/workflows/t2smap.py @@ -171,21 +171,27 @@ def t2smap_workflow(data, tes, out_dir='.', mask=None, ----- This workflow writes out several files, which are described below: - ========================== ================================================= + ============================= ================================================= Filename Content - ========================== ================================================= - T2starmap.nii.gz Limited estimated T2* 3D map or 4D timeseries. + ============================= ================================================= + T2starmap.nii.gz Estimated T2* 3D map or 4D timeseries. Will be a 3D map if ``fitmode`` is 'all' and a 4D timeseries if it is 'ts'. - S0map.nii.gz Limited S0 3D map or 4D timeseries. - desc-full_T2starmap.nii.gz Full T2* map/timeseries. The difference between + S0map.nii.gz S0 3D map or 4D timeseries. + desc-limited_T2starmap.nii.gz Limited T2* map/timeseries. The difference between the limited and full maps is that, for voxels affected by dropout where only one echo contains - good data, the full map uses the single echo's - value while the limited map has a NaN. - desc-full_S0map.nii.gz Full S0 map/timeseries. + good data, the full map uses the T2* estimate + from the first two echos, while the limited map + will have a NaN. + desc-limited_S0map.nii.gz Limited S0 map/timeseries. The difference between + the limited and full maps is that, for voxels + affected by dropout where only one echo contains + good data, the full map uses the S0 estimate + from the first two echos, while the limited map + will have a NaN. desc-optcom_bold.nii.gz Optimally combined timeseries. - ========================== ================================================= + ============================= ================================================= """ out_dir = op.abspath(out_dir) if not op.isdir(out_dir): @@ -234,11 +240,11 @@ def t2smap_workflow(data, tes, out_dir='.', mask=None, # set a hard cap for the T2* map/timeseries # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile - cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5, + cap_t2s = stats.scoreatpercentile(t2s_full.flatten(), 99.5, interpolation_method='lower') cap_t2s_sec = utils.millisec2sec(cap_t2s * 10.) LGR.debug('Setting cap on T2* map at {:.5f}s'.format(cap_t2s_sec)) - t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s + t2s_full[t2s_full > cap_t2s * 10] = cap_t2s LGR.info('Computing optimal combination') # optimally combine data @@ -246,24 +252,24 @@ def t2smap_workflow(data, tes, out_dir='.', mask=None, combmode=combmode) # clean up numerical errors - for arr in (OCcatd, s0_limited, t2s_limited): + for arr in (OCcatd, s0_full, t2s_full): np.nan_to_num(arr, copy=False) - s0_limited[s0_limited < 0] = 0 - t2s_limited[t2s_limited < 0] = 0 + s0_full[s0_full < 0] = 0 + t2s_full[t2s_full < 0] = 0 io_generator.save_file( - utils.millisec2sec(t2s_limited), + utils.millisec2sec(t2s_full), 't2star img', ) - io_generator.save_file(s0_limited, 's0 img') + io_generator.save_file(s0_full, 's0 img') io_generator.save_file( - utils.millisec2sec(t2s_full), - 'full t2star img', + utils.millisec2sec(t2s_limited), + 'limited t2star img', ) io_generator.save_file( - s0_full, - 'full s0 img', + s0_limited, + 'limited s0 img', ) io_generator.save_file(OCcatd, 'combined img') diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 2259c7520..47b765eff 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -473,32 +473,50 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, RepLGR.info("An initial mask was generated from the first echo using " "nilearn's compute_epi_mask function.") - # Create an adaptive mask with at least 3 good echoes. - mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True, threshold=3) - LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) - io_generator.save_file(masksum, "adaptive mask img") + # Create an adaptive mask with at least 1 good echo, for denoising + mask_denoise, masksum_denoise = utils.make_adaptive_mask( + catd, + mask=mask, + getsum=True, + threshold=1, + ) + LGR.debug('Retaining {}/{} samples for denoising'.format(mask_denoise.sum(), n_samp)) + io_generator.save_file(masksum_denoise, "adaptive mask img") + + # Create an adaptive mask with at least 3 good echoes, for classification + masksum_clf = masksum_denoise.copy() + masksum_clf[masksum_clf < 3] = 0 + mask_clf = masksum_clf.astype(bool) + RepLGR.info( + "A two-stage masking procedure was applied, in which a liberal mask " + "(including voxels with good data in at least the first echo) was used for " + "optimal combination, T2*/S0 estimation, and denoising, while a more conservative mask " + "(restricted to voxels with good data in at least the first three echoes) was used for " + "the component classification procedure." + ) + LGR.debug('Retaining {}/{} samples for classification'.format(mask_clf.sum(), n_samp)) if t2smap is None: LGR.info('Computing T2* map') t2s_limited, s0_limited, t2s_full, s0_full = decay.fit_decay( - catd, tes, mask, masksum, fittype) + catd, tes, mask_denoise, masksum_denoise, fittype) # set a hard cap for the T2* map # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile - cap_t2s = stats.scoreatpercentile(t2s_limited.flatten(), 99.5, + cap_t2s = stats.scoreatpercentile(t2s_full.flatten(), 99.5, interpolation_method='lower') LGR.debug('Setting cap on T2* map at {:.5f}s'.format( utils.millisec2sec(cap_t2s))) - t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s - io_generator.save_file(utils.millisec2sec(t2s_limited), 't2star img') - io_generator.save_file(s0_limited, 's0 img') + t2s_full[t2s_full > cap_t2s * 10] = cap_t2s + io_generator.save_file(utils.millisec2sec(t2s_full), 't2star img') + io_generator.save_file(s0_full, 's0 img') if verbose: - io_generator.save_file(utils.millisec2sec(t2s_full), 'full t2star img') - io_generator.save_file(s0_full, 'full s0 img') + io_generator.save_file(utils.millisec2sec(t2s_limited), 'limited t2star img') + io_generator.save_file(s0_limited, 'limited s0 img') # optimally combine data - data_oc = combine.make_optcom(catd, tes, masksum, t2s=t2s_full, combmode=combmode) + data_oc = combine.make_optcom(catd, tes, masksum_denoise, t2s=t2s_full, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: @@ -509,14 +527,14 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, if mixm is None: # Identify and remove thermal noise from data - dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask, - masksum, t2s_full, io_generator, + dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask_clf, + masksum_clf, t2s_full, io_generator, tes=tes, algorithm=tedpca, kdaw=10., rdaw=1., verbose=verbose, low_mem=low_mem) if verbose: - io_generator.save_file(utils.unmask(dd, mask), 'whitened img') + io_generator.save_file(utils.unmask(dd, mask_clf), 'whitened img') # Perform ICA, calculate metrics, and apply decision tree # Restart when ICA fails to converge or too few BOLD components found @@ -542,7 +560,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, 'd_table_score' ] comptable = metrics.collect.generate_metrics( - catd, data_oc, mmix, masksum, tes, + catd, data_oc, mmix, masksum_clf, tes, io_generator, 'ICA', metrics=required_metrics, ) @@ -574,7 +592,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, 'd_table_score' ] comptable = metrics.collect.generate_metrics( - catd, data_oc, mmix, masksum, tes, + catd, data_oc, mmix, masksum_clf, tes, io_generator, 'ICA', metrics=required_metrics, ) @@ -594,7 +612,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, comp_names = comptable["Component"].values mixing_df = pd.DataFrame(data=mmix, columns=comp_names) io_generator.save_file(mixing_df, "ICA mixing tsv") - betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) + betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask_denoise), mask_denoise) io_generator.save_file(betas_oc, 'z-scored ICA components img') # Save component table and associated json @@ -641,17 +659,17 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, "series.") io.writeresults(data_oc, - mask=mask, + mask=mask_denoise, comptable=comptable, mmix=mmix, n_vols=n_vols, io_generator=io_generator) if 'mir' in gscontrol: - gsc.minimum_image_regression(data_oc, mmix, mask, comptable, io_generator) + gsc.minimum_image_regression(data_oc, mmix, mask_denoise, comptable, io_generator) if verbose: - io.writeresults_echoes(catd, mmix, mask, comptable, io_generator) + io.writeresults_echoes(catd, mmix, mask_denoise, comptable, io_generator) # Write out BIDS-compatible description file derivative_metadata = { @@ -715,20 +733,20 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, if not no_reports: LGR.info('Making figures folder with static component maps and timecourse plots.') - dn_ts, hikts, lowkts = io.denoise_ts(data_oc, mmix, mask, comptable) + dn_ts, hikts, lowkts = io.denoise_ts(data_oc, mmix, mask_denoise, comptable) reporting.static_figures.carpet_plot( optcom_ts=data_oc, denoised_ts=dn_ts, hikts=hikts, lowkts=lowkts, - mask=mask, + mask=mask_denoise, io_generator=io_generator, gscontrol=gscontrol, ) reporting.static_figures.comp_figures( data_oc, - mask=mask, + mask=mask_denoise, comptable=comptable, mmix=mmix_orig, io_generator=io_generator,