From ad664ae45dedb4246f44283d4931db24f4cfcad5 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 5 May 2021 17:20:16 -0400 Subject: [PATCH] [ENH] Generate BIDS Derivatives-compatible outputs (#691) * BIDS Derivatives-compatible outputs. * Switches to class-based output generator for file naming Co-authored-by: Taylor Salo Co-authored-by: Joshua Teves --- .gitignore | 5 +- docs/api.rst | 1 - docs/approach.rst | 16 +- docs/outputs.rst | 218 ++--- tedana/decomposition/pca.py | 109 ++- tedana/gscontrol.py | 44 +- tedana/io.py | 773 +++++++++++------- tedana/metrics/kundu_fit.py | 228 ++++-- tedana/reporting/dynamic_figures.py | 21 +- tedana/reporting/html_report.py | 16 +- tedana/reporting/static_figures.py | 17 +- tedana/resources/config/outputs.json | 186 +++++ tedana/selection/tedica.py | 104 ++- tedana/selection/tedpca.py | 28 +- .../tests/data/cornell_three_echo_outputs.txt | 43 +- tedana/tests/data/fiu_four_echo_outputs.txt | 119 +-- .../data/nih_five_echo_outputs_t2smap.txt | 1 + .../data/nih_five_echo_outputs_verbose.txt | 115 ++- tedana/tests/test_gscontrol.py | 15 +- tedana/tests/test_integration.py | 13 +- tedana/tests/test_io.py | 64 +- .../test_model_fit_dependence_metrics.py | 18 +- tedana/tests/test_model_kundu_metrics.py | 6 +- tedana/tests/test_selection.py | 12 +- tedana/utils.py | 10 + tedana/workflows/t2smap.py | 63 +- tedana/workflows/tedana.py | 269 ++++-- 27 files changed, 1667 insertions(+), 847 deletions(-) create mode 100644 tedana/resources/config/outputs.json diff --git a/.gitignore b/.gitignore index 51be73d77..39e49b8a2 100644 --- a/.gitignore +++ b/.gitignore @@ -108,4 +108,7 @@ ENV/ # jupyter notebooks .ipynb_checkpoints/ -*.ipynb \ No newline at end of file +*.ipynb + +# vim swap files +*.swp diff --git a/docs/api.rst b/docs/api.rst index 5d12afa4c..7e94c90ad 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -161,7 +161,6 @@ API tedana.io.add_decomp_prefix tedana.io.split_ts tedana.io.write_split_ts - tedana.io.writefeats tedana.io.writeresults tedana.io.writeresults_echoes diff --git a/docs/approach.rst b/docs/approach.rst index ff76cb839..18fcae697 100644 --- a/docs/approach.rst +++ b/docs/approach.rst @@ -69,7 +69,7 @@ estimate voxel-wise :math:`T_{2}^*` and :math:`S_0`. :math:`S_0` corresponds to the total signal in each voxel before decay and can reflect coil sensivity. :math:`T_{2}^*` corresponds to the rate at which a voxel decays over time, which is related to signal dropout and BOLD sensitivity. -Estimates of the parameters are saved as **t2sv.nii.gz** and **s0v.nii.gz**. +Estimates of the parameters are saved as **T2starmap.nii.gz** and **S0map.nii.gz**. While :math:`T_{2}^*` and :math:`S_0` in fact fluctuate over time, estimating them on a volume-by-volume basis with only a small number of echoes is not @@ -153,7 +153,7 @@ between the distributions for other echoes. The time series for the optimally combined data also looks like a combination of the other echoes (which it is). -This optimally combined data is written out as **ts_OC.nii.gz** +This optimally combined data is written out as **desc-optcom_bold.nii.gz** .. image:: /_static/a10_optimal_combination_timeseries.png @@ -194,7 +194,7 @@ component analysis (PCA). The goal of this step is to make it easier for the later ICA decomposition to converge. Dimensionality reduction is a common step prior to ICA. TEDPCA applies PCA to the optimally combined data in order to decompose it into component maps and -time series (saved as **mepca_mix.1D**). +time series (saved as **desc-PCA_mixing.tsv**). Here we can see time series for some example components (we don't really care about the maps): .. image:: /_static/a11_pca_component_timeseries.png @@ -277,9 +277,9 @@ Next, ``tedana`` applies TE-dependent independent component analysis (ICA) in order to identify and remove TE-independent (i.e., non-BOLD noise) components. The dimensionally reduced optimally combined data are first subjected to ICA in order to fit a mixing matrix to the whitened data. -This generates a number of independent timeseries (saved as **meica_mix.1D**), -as well as beta maps which show the spatial loading of these components on the -brain (**betas_OC.nii.gz**). +This generates a number of independent timeseries (saved as **desc-ICA_mixing.tsv**), +as well as parameter estimate maps which show the spatial loading of these components on the +brain (**desc-ICA_components.nii.gz**). .. image:: /_static/a13_ica_component_timeseries.png @@ -312,13 +312,13 @@ The blue and red lines show the predicted values for the :math:`S_0` and A decision tree is applied to :math:`\kappa`, :math:`\rho`, and other metrics in order to classify ICA components as TE-dependent (BOLD signal), TE-independent (non-BOLD noise), or neither (to be ignored). -These classifications are saved in `comp_table_ica.txt`. +These classifications are saved in **desc-tedana_metrics.tsv**. The actual decision tree is dependent on the component selection algorithm employed. ``tedana`` includes the option `kundu` (which uses hardcoded thresholds applied to each of the metrics). Components that are classified as noise are projected out of the optimally combined data, -yielding a denoised timeseries, which is saved as `dn_ts_OC.nii.gz`. +yielding a denoised timeseries, which is saved as **desc-optcomDenoised_bold.nii.gz**. .. image:: /_static/a15_denoised_data_timeseries.png diff --git a/docs/outputs.rst b/docs/outputs.rst index 22c781ab0..beedf8698 100644 --- a/docs/outputs.rst +++ b/docs/outputs.rst @@ -6,118 +6,136 @@ Outputs of tedana tedana derivatives ------------------ -====================== ===================================================== -Filename Content -====================== ===================================================== -t2sv.nii.gz Limited estimated T2* 3D map. - Values are in seconds. - The difference between the limited and full maps - is that, for voxels affected by dropout where - only one echo contains good data, the full map - uses the single echo's value while the limited - map has a NaN. -s0v.nii.gz Limited S0 3D map. - The difference between the limited and full maps - is that, for voxels affected by dropout where - only one echo contains good data, the full map - uses the single echo's value while the limited - map has a NaN. -ts_OC.nii.gz Optimally combined time series. -dn_ts_OC.nii.gz Denoised optimally combined time series. Recommended - dataset for analysis. -lowk_ts_OC.nii.gz Combined time series from rejected components. -midk_ts_OC.nii.gz Combined time series from "mid-k" rejected components. -hik_ts_OC.nii.gz High-kappa time series. This dataset does not - include thermal noise or low variance components. - Not the recommended dataset for analysis. -adaptive_mask.nii.gz Integer-valued mask used in the workflow, where - each voxel's value corresponds to the number of good - echoes to be used for T2*/S0 estimation. -pca_decomposition.json TEDPCA component table. A BIDS Derivatives-compatible - json file with summary metrics and inclusion/exclusion - information for each component from the PCA - decomposition. To view, you may want to use - ``io.load_comptable``, which returns a pandas - DataFrame from the json file. -pca_mixing.tsv Mixing matrix (component time series) from PCA - decomposition in a tab-delimited file. Each column is - a different component, and the column name is the - component number. -pca_components.nii.gz Component weight maps from PCA decomposition. - Each map corresponds to the same component index in - the mixing matrix and component table. -ica_decomposition.json TEDICA component table. A BIDS Derivatives-compatible - json file with summary metrics and inclusion/exclusion - information for each component from the ICA - decomposition. To view, you may want to use - ``io.load_comptable``, which returns a pandas - DataFrame from the json file. -ica_mixing.tsv Mixing matrix (component time series) from ICA - decomposition in a tab-delimited file. Each column is - a different component, and the column name is the - component number. -ica_components.nii.gz Component weight maps from ICA decomposition. - Values are z-transformed standardized regression - coefficients. Each map corresponds to the same - component index in the mixing matrix and component table. - Should be the same as "feats_OC2.nii.gz". -betas_OC.nii.gz Full ICA coefficient feature set. -betas_hik_OC.nii.gz High-kappa ICA coefficient feature set -feats_OC2.nii.gz Z-normalized spatial component maps -report.txt A summary report for the workflow with relevant - citations. -====================== ===================================================== +================================================ ===================================================== +Filename Content +================================================ ===================================================== +dataset_description.json Top-level metadata for the workflow. +T2starmap.nii.gz Limited estimated T2* 3D map. + Values are in seconds. + The difference between the limited and full maps + is that, for voxels affected by dropout where + only one echo contains good data, the full map + uses the single echo's value while the limited + map has a NaN. +S0map.nii.gz Limited S0 3D map. + The difference between the limited and full maps + is that, for voxels affected by dropout where + only one echo contains good data, the full map + uses the single echo's value while the limited + map has a NaN. +desc-optcom_bold.nii.gz Optimally combined time series. +desc-optcomDenoised_bold.nii.gz Denoised optimally combined time series. Recommended + dataset for analysis. +desc-optcomRejected_bold.nii.gz Combined time series from rejected components. +desc-optcomAccepted_bold.nii.gz High-kappa time series. This dataset does not + include thermal noise or low variance components. + Not the recommended dataset for analysis. +desc-adaptiveGoodSignal_mask.nii.gz Integer-valued mask used in the workflow, where + each voxel's value corresponds to the number of good + echoes to be used for T2\*/S0 estimation. +desc-PCA_mixing.tsv Mixing matrix (component time series) from PCA + decomposition in a tab-delimited file. Each column is + a different component, and the column name is the + component number. +desc-PCA_decomposition.json Metadata for the PCA decomposition. +desc-PCA_stat-z_components.nii.gz Component weight maps from PCA decomposition. + Each map corresponds to the same component index in + the mixing matrix and component table. + Maps are in z-statistics. +desc-PCA_metrics.tsv TEDPCA component table. A BIDS Derivatives-compatible + TSV file with summary metrics and inclusion/exclusion + information for each component from the PCA + decomposition. +desc-PCA_metrics.json Metadata about the metrics in ``desc-PCA_metrics.tsv``. +desc-ICA_mixing.tsv Mixing matrix (component time series) from ICA + decomposition in a tab-delimited file. Each column is + a different component, and the column name is the + component number. +desc-ICA_components.nii.gz Full ICA coefficient feature set. +desc-ICA_stat-z_components.nii.gz Z-statistic component weight maps from ICA + decomposition. + Values are z-transformed standardized regression + coefficients. Each map corresponds to the same + component index in the mixing matrix and component table. +desc-ICA_decomposition.json Metadata for the ICA decomposition. +desc-tedana_metrics.tsv TEDICA component table. A BIDS Derivatives-compatible + TSV file with summary metrics and inclusion/exclusion + information for each component from the ICA + decomposition. +desc-tedana_metrics.json Metadata about the metrics in + ``desc-tedana_metrics.tsv``. +desc-ICAAccepted_components.nii.gz High-kappa ICA coefficient feature set +desc-ICAAcceptedZ_components.nii.gz Z-normalized spatial component maps +report.txt A summary report for the workflow with relevant + citations. +tedana_report.html The interactive HTML report. +================================================ ===================================================== If ``verbose`` is set to True: -====================== ===================================================== -Filename Content -====================== ===================================================== -t2svG.nii.gz Full T2* map/time series. - Values are in seconds. - The difference between the limited and full maps is - that, for voxels affected by dropout where only one - echo contains good data, the full map uses the - single echo's value while the limited map has a NaN. - Only used for optimal combination. -s0vG.nii.gz Full S0 map/time series. Only used for optimal - combination. -hik_ts_e[echo].nii.gz High-Kappa time series for echo number ``echo`` -midk_ts_e[echo].nii.gz Mid-Kappa time series for echo number ``echo`` -lowk_ts_e[echo].nii.gz Low-Kappa time series for echo number ``echo`` -dn_ts_e[echo].nii.gz Denoised time series for echo number ``echo`` -====================== ===================================================== +============================================================== ===================================================== +Filename Content +============================================================== ===================================================== +desc-full_T2starmap.nii.gz Full T2* map/time series. + Values are in seconds. + The difference between the limited and full maps is + that, for voxels affected by dropout where only one + echo contains good data, the full map uses the + single echo's value while the limited map has a NaN. + Only used for optimal combination. +desc-full_S0map.nii.gz Full S0 map/time series. Only used for optimal + combination. +echo-[echo]_desc-[PCA|ICA]_components.nii.gz Echo-wise PCA/ICA component weight maps. +echo-[echo]_desc-[PCA|ICA]R2ModelPredictions_components.nii.gz Component- and voxel-wise R2-model predictions, + separated by echo. +echo-[echo]_desc-[PCA|ICA]S0ModelPredictions_components.nii.gz Component- and voxel-wise S0-model predictions, + separated by echo. +desc-[PCA|ICA]AveragingWeights_components.nii.gz Component-wise averaging weights for metric + calculation. +desc-optcomPCAReduced_bold.nii.gz Optimally combined data after dimensionality + reduction with PCA. This is the input to the ICA. +echo-[echo]_desc-Accepted_bold.nii.gz High-Kappa time series for echo number ``echo`` +echo-[echo]_desc-Rejected_bold.nii.gz Low-Kappa time series for echo number ``echo`` +echo-[echo]_desc-Denoised_bold.nii.gz Denoised time series for echo number ``echo`` +============================================================== ===================================================== If ``gscontrol`` includes 'gsr': -====================== ===================================================== -Filename Content -====================== ===================================================== -T1gs.nii.gz Spatial global signal -glsig.1D Time series of global signal from optimally combined - data. -tsoc_orig.nii.gz Optimally combined time series with global signal - retained. -tsoc_nogs.nii.gz Optimally combined time series with global signal - removed. -====================== ===================================================== +================================================ ===================================================== +Filename Content +================================================ ===================================================== +desc-globalSignal_map.nii.gz Spatial global signal +desc-globalSignal_timeseries.tsv Time series of global signal from optimally combined + data. +desc-optcomWithGlobalSignal_bold.nii.gz Optimally combined time series with global signal + retained. +desc-optcomNoGlobalSignal_bold.nii.gz Optimally combined time series with global signal + removed. +================================================ ===================================================== If ``gscontrol`` includes 't1c': -======================= ===================================================== -Filename Content -======================= ===================================================== -sphis_hik.nii.gz T1-like effect -hik_ts_OC_T1c.nii.gz T1 corrected high-kappa time series by regression -dn_ts_OC_T1c.nii.gz T1 corrected denoised time series -betas_hik_OC_T1c.nii.gz T1-GS corrected high-kappa components -meica_mix_T1c.1D T1-GS corrected mixing matrix -======================= ===================================================== +================================================ ===================================================== +Filename Content +================================================ ===================================================== +desc-T1likeEffect_min.nii.gz T1-like effect +desc-optcomAcceptedT1cDenoised_bold.nii.gz T1-corrected high-kappa time series by regression +desc-optcomT1cDenoised_bold.nii.gz T1-corrected denoised time series +desc-TEDICAAcceptedT1cDenoised_components.nii.gz T1-GS corrected high-kappa components +desc-TEDICAT1cDenoised_mixing.tsv T1-GS corrected mixing matrix +================================================ ===================================================== Component tables ---------------- -TEDPCA and TEDICA use tab-delimited tables to track relevant metrics, component +TEDPCA and TEDICA use component tables to track relevant metrics, component classifications, and rationales behind classifications. +The component tables are stored as json files for BIDS-compatibility. +This format is not very conducive to manual review, which is why we have +:py:func:`tedana.io.load_comptable` to load the json file into a pandas +DataFrame. + +In order to make sense of the rationale codes in the component tables, +consult the tables below. TEDPCA rationale codes start with a "P", while TEDICA codes start with an "I". =============== ============================================================= @@ -173,7 +191,7 @@ The report is saved in a plain-text file, report.txt, in the output directory. An example report - TE-dependence analysis was performed on input data. An initial mask was generated from the first echo using nilearn's compute_epi_mask function. An adaptive mask was then generated, in which each voxel's value reflects the number of echoes with 'good' data. A monoexponential model was fit to the data at each voxel using log-linear regression in order to estimate T2* and S0 maps. For each voxel, the value from the adaptive mask was used to determine which echoes would be used to estimate T2* and S0. Multi-echo data were then optimally combined using the 't2s' (Posse et al., 1999) combination method. Global signal regression was applied to the multi-echo and optimally combined datasets. Principal component analysis followed by the Kundu component selection decision tree (Kundu et al., 2013) was applied to the optimally combined data for dimensionality reduction. Independent component analysis was then used to decompose the dimensionally reduced dataset. A series of TE-dependence metrics were calculated for each ICA component, including Kappa, Rho, and variance explained. Next, component selection was performed to identify BOLD (TE-dependent), non-BOLD (TE-independent), and uncertain (low-variance) components using the Kundu decision tree (v2.5; Kundu et al., 2013). T1c global signal regression was then applied to the data in order to remove spatially diffuse noise. + TE-dependence analysis was performed on input data. An initial mask was generated from the first echo using nilearn's compute_epi_mask function. An adaptive mask was then generated, in which each voxel's value reflects the number of echoes with 'good' data. A monoexponential model was fit to the data at each voxel using nonlinear model fitting in order to estimate T2* and S0 maps, using T2*/S0 estimates from a log-linear fit as initial values. For each voxel, the value from the adaptive mask was used to determine which echoes would be used to estimate T2* and S0. In cases of model fit failure, T2*/S0 estimates from the log-linear fit were retained instead. Multi-echo data were then optimally combined using the T2* combination method (Posse et al., 1999). Principal component analysis in which the number of components was determined based on a variance explained threshold was applied to the optimally combined data for dimensionality reduction. A series of TE-dependence metrics were calculated for each component, including Kappa, Rho, and variance explained. Independent component analysis was then used to decompose the dimensionally reduced dataset. A series of TE-dependence metrics were calculated for each component, including Kappa, Rho, and variance explained. Next, component selection was performed to identify BOLD (TE-dependent), non-BOLD (TE-independent), and uncertain (low-variance) components using the Kundu decision tree (v2.5; Kundu et al., 2013). Rejected components' time series were then orthogonalized with respect to accepted components' time series. This workflow used numpy (Van Der Walt, Colbert, & Varoquaux, 2011), scipy (Jones et al., 2001), pandas (McKinney, 2010), scikit-learn (Pedregosa et al., 2011), nilearn, and nibabel (Brett et al., 2019). diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py index 67f27faec..de3e886bc 100644 --- a/tedana/decomposition/pca.py +++ b/tedana/decomposition/pca.py @@ -2,7 +2,6 @@ PCA and related signal decomposition methods for tedana """ import logging -import os.path as op from numbers import Number import numpy as np @@ -48,8 +47,8 @@ def low_mem_pca(data): def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, - ref_img, tes, algorithm='mdl', kdaw=10., rdaw=1., - out_dir='.', verbose=False, low_mem=False): + io_generator, tes, algorithm='mdl', kdaw=10., rdaw=1., + verbose=False, low_mem=False): """ Use principal components analysis (PCA) to identify and remove thermal noise from multi-echo data. @@ -73,8 +72,8 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, For more information on thresholding, see `make_adaptive_mask`. t2sG : (S,) array_like Map of voxel-wise T2* estimates. - ref_img : :obj:`str` or img_like - Reference image to dictate how outputs are saved to disk + io_generator : :obj:`tedana.io.OutputGenerator` + The output generation object for this workflow tes : :obj:`list` List of echo times associated with `data_cat`, in milliseconds algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional @@ -91,8 +90,6 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, rdaw : :obj:`float`, optional Dimensionality augmentation weight for Rho calculations. Must be a non-negative float, or -1 (a special value). Default is 1. - out_dir : :obj:`str`, optional - Output directory. verbose : :obj:`bool`, optional Whether to output files from fitmodels_direct or not. Default: False low_mem : :obj:`bool`, optional @@ -147,19 +144,20 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, Outputs: This function writes out several files: - - ====================== ================================================= - Filename Content - ====================== ================================================= - pca_decomposition.json PCA component table. - pca_mixing.tsv PCA mixing matrix. - pca_components.nii.gz Component weight maps. - ====================== ================================================= + =========================== ============================================= + Default Filename Content + =========================== ============================================= + desc-PCA_decomposition.json PCA component table + desc-PCA_mixing.tsv PCA mixing matrix + desc-PCA_components.nii.gz Component weight maps + =========================== ============================================= See Also -------- - :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask`` - parameter. + :func:`tedana.utils.make_adaptive_mask` : The function used to create + the ``adaptive_mask` parameter. + :module:`tedana.constants` : The module describing the filenames for + various naming conventions """ if algorithm == 'kundu': alg_str = ("followed by the Kundu component selection decision " @@ -204,8 +202,8 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, data_z = (data_z - data_z.mean()) / data_z.std() # var normalize everything if algorithm in ['mdl', 'aic', 'kic']: - data_img = io.new_nii_like(ref_img, utils.unmask(data, mask)) - mask_img = io.new_nii_like(ref_img, mask.astype(int)) + data_img = io.new_nii_like(io_generator.reference_img, utils.unmask(data, mask)) + mask_img = io.new_nii_like(io_generator.reference_img, mask.astype(int)) voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca( data_img, mask_img, algorithm, normalize=True) elif isinstance(algorithm, Number): @@ -231,10 +229,11 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, # Compute Kappa and Rho for PCA comps # Normalize each component's time series vTmixN = stats.zscore(comp_ts, axis=0) - comptable, _, _, _ = metrics.dependence_metrics( - data_cat, data_oc, comp_ts, adaptive_mask, tes, ref_img, - reindex=False, mmixN=vTmixN, algorithm=None, - label='mepca_', out_dir=out_dir, verbose=verbose) + comptable, _, metric_metadata, _, _ = metrics.dependence_metrics( + data_cat, data_oc, comp_ts, adaptive_mask, tes, io_generator, + reindex=False, mmixN=vTmixN, algorithm=None, + label='PCA', verbose=verbose + ) # varex_norm from PCA overrides varex_norm from dependence_metrics, # but we retain the original @@ -243,15 +242,28 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, comptable['normalized variance explained'] = varex_norm # write component maps to 4D image - comp_ts_z = stats.zscore(comp_ts, axis=0) - comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) - io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) + comp_maps = utils.unmask(computefeats2(data_oc, comp_ts, mask), mask) + io_generator.save_file(comp_maps, 'z-scored PCA components img') # Select components using decision tree if algorithm == 'kundu': - comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False) + comptable, metric_metadata = kundu_tedpca( + comptable, + metric_metadata, + n_echos, + kdaw, + rdaw, + stabilize=False, + ) elif algorithm == 'kundu-stabilize': - comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True) + comptable, metric_metadata = kundu_tedpca( + comptable, + metric_metadata, + n_echos, + kdaw, + rdaw, + stabilize=True, + ) else: alg_str = "variance explained-based" if isinstance(algorithm, Number) else algorithm LGR.info('Selected {0} components with {1} dimensionality ' @@ -259,22 +271,39 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, comptable['classification'] = 'accepted' comptable['rationale'] = '' - # Save decomposition + # Save decomposition files comp_names = [io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max()) for comp in comptable.index.values] mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names) - mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False) - - comptable['Description'] = 'PCA fit to optimally combined data.' - mmix_dict = {} - mmix_dict['Method'] = ('Principal components analysis implemented by ' - 'sklearn. Components are sorted by variance ' - 'explained in descending order. ' - 'Component signs are flipped to best match the ' - 'data.') - io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'), - label='pca', metadata=mmix_dict) + io_generator.save_file(mixing_df, "PCA mixing tsv") + + # Save component table and associated json + temp_comptable = comptable.set_index("Component", inplace=False) + io_generator.save_file(temp_comptable, "PCA metrics tsv") + + metric_metadata["Component"] = { + "LongName": "Component identifier", + "Description": ( + "The unique identifier of each component. " + "This identifier matches column names in the mixing matrix TSV file." + ), + } + io_generator.save_file(metric_metadata, "PCA metrics json") + + decomp_metadata = { + "Method": ( + "Principal components analysis implemented by sklearn. " + "Components are sorted by variance explained in descending order. " + "Component signs are flipped to best match the data." + ), + } + for comp_name in comp_names: + decomp_metadata[comp_name] = { + "Description": "PCA fit to optimally combined data.", + "Method": "tedana", + } + io_generator.save_file(decomp_metadata, "PCA decomposition json") acc = comptable[comptable.classification == 'accepted'].index.values n_components = acc.size diff --git a/tedana/gscontrol.py b/tedana/gscontrol.py index c72ae5b0b..a49ad3422 100644 --- a/tedana/gscontrol.py +++ b/tedana/gscontrol.py @@ -2,13 +2,13 @@ Global signal control methods """ import logging -import os.path as op import numpy as np +import pandas as pd from scipy import stats from scipy.special import lpmv -from tedana import io, utils +from tedana import utils from tedana.due import due, Doi LGR = logging.getLogger(__name__) @@ -16,7 +16,7 @@ RefLGR = logging.getLogger("REFERENCES") -def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4): +def gscontrol_raw(catd, optcom, n_echos, io_generator, dtrank=4): """ Removes global signal from individual echo `catd` and `optcom` time series @@ -34,10 +34,8 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4): Optimally combined functional data (i.e., the output of `make_optcom`) n_echos : :obj:`int` Number of echos in data. Should be the same as `E` dimension of `catd` - ref_img : :obj:`str` or img_like - Reference image to dictate how outputs are saved to disk - out_dir : :obj:`str`, optional - Output directory. + io_generator : :obj:`tedana.io.OutputGenerator` + The output generator for this workflow dtrank : :obj:`int`, optional Specifies degree of Legendre polynomial basis function for estimating spatial global signal. Default: 4 @@ -78,13 +76,15 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4): detr = dat - np.dot(sol.T, Lmix.T)[0] sphis = (detr).min(axis=1) sphis -= sphis.mean() - io.filewrite(utils.unmask(sphis, Gmask), op.join(out_dir, 'T1gs'), ref_img) + io_generator.save_file(utils.unmask(sphis, Gmask), "gs img") # find time course ofc the spatial global signal # make basis with the Legendre basis glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0] glsig = stats.zscore(glsig, axis=None) - np.savetxt(op.join(out_dir, 'glsig.1D'), glsig) + + glsig_df = pd.DataFrame(data=glsig.T, columns=['global_signal']) + io_generator.save_file(glsig_df, "global signal time series tsv") glbase = np.hstack([Lmix, glsig.T]) # Project global signal out of optimally combined data @@ -92,9 +92,9 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4): tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis] - io.filewrite(optcom, op.join(out_dir, 'tsoc_orig'), ref_img) + io_generator.save_file(optcom, "has gs combined img") dm_optcom = utils.unmask(tsoc_nogs, Gmask) - io.filewrite(dm_optcom, op.join(out_dir, 'tsoc_nogs'), ref_img) + io_generator.save_file(dm_optcom, "removed gs combined img") # Project glbase out of each echo dm_catd = catd.copy() # don't overwrite catd @@ -111,7 +111,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4): @due.dcite(Doi("10.1073/pnas.1301725110"), description="Minimum image regression to remove T1-like effects " "from the denoised data.") -def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir="."): +def minimum_image_regression(optcom_ts, mmix, mask, comptable, io_generator): """ Perform minimum image regression (MIR) to remove T1-like effects from BOLD-like components. @@ -131,10 +131,8 @@ def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir= comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index should be the component number. - ref_img : :obj:`str` or img_like - Reference image to dictate how outputs are saved to disk - out_dir : :obj:`str`, optional - Output directory. + io_generator : :obj:`tedana.io.OutputGenerator` + The output generating object for this workflow Notes ----- @@ -202,7 +200,7 @@ def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir= mehk_ts = np.dot(comp_pes[:, acc], mmix[:, acc].T) t1_map = mehk_ts.min(axis=-1) # map of T1-like effect t1_map -= t1_map.mean() - io.filewrite(utils.unmask(t1_map, mask), op.join(out_dir, "sphis_hik"), ref_img) + io_generator.save_file(utils.unmask(t1_map, mask), "t1 like img") t1_map = t1_map[:, np.newaxis] # Find the global signal based on the T1-like effect @@ -213,11 +211,11 @@ def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir= np.linalg.lstsq(glob_sig.T, mehk_ts.T, rcond=None)[0].T, glob_sig ) hik_ts = mehk_noT1gs * optcom_std # rescale - io.filewrite(utils.unmask(hik_ts, mask), op.join(out_dir, "hik_ts_OC_MIR"), ref_img) + io_generator.save_file(utils.unmask(hik_ts, mask), "ICA accepted mir denoised img") # Make denoised version of T1-corrected time series medn_ts = optcom_mean + ((mehk_noT1gs + resid) * optcom_std) - io.filewrite(utils.unmask(medn_ts, mask), op.join(out_dir, "dn_ts_OC_MIR"), ref_img) + io_generator.save_file(utils.unmask(medn_ts, mask), "mir denoised img") # Orthogonalize mixing matrix w.r.t. T1-GS mmix_noT1gs = mmix.T - np.dot( @@ -230,9 +228,9 @@ def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir= # Write T1-corrected components and mixing matrix comp_pes_norm = np.linalg.lstsq(mmix_noT1gs_z.T, optcom_z.T, rcond=None)[0].T - io.filewrite( + io_generator.save_file( utils.unmask(comp_pes_norm[:, 2:], mask), - op.join(out_dir, "betas_hik_OC_MIR"), - ref_img, + "ICA accepted mir component weights img", ) - np.savetxt(op.join(out_dir, "meica_mix_MIR.1D"), mmix_noT1gs) + mixing_df = pd.DataFrame(data=mmix_noT1gs.T, columns=comptable["Component"].values) + io_generator.save_file(mixing_df, "ICA MIR mixing tsv") diff --git a/tedana/io.py b/tedana/io.py index 8e5e70c5b..e32a5d803 100644 --- a/tedana/io.py +++ b/tedana/io.py @@ -1,66 +1,331 @@ +"""The io module handles most file input and output in the `tedana` workflow. + +Other functions in the module help write outputs which require multiple +data sources, assist in writing per-echo verbose outputs, or act as helper +functions for any of the above. """ -Functions to handle file input/output -""" -import json import logging +import os import os.path as op +import json +from string import Formatter import numpy as np -import pandas as pd import nibabel as nib -from nibabel.filename_parser import splitext_addext +import pandas as pd from nilearn._utils import check_niimg from nilearn.image import new_img_like from tedana import utils from tedana.stats import computefeats2, get_coeffs + LGR = logging.getLogger(__name__) RepLGR = logging.getLogger('REPORT') RefLGR = logging.getLogger('REFERENCES') -def split_ts(data, mmix, mask, comptable): +class OutputGenerator(): + """A class for managing tedana outputs. + + Parameters + ---------- + reference_img : img_like + The reference image which defines affine, shape, etc. of output images. + convention : {"bidsv1.5.0", "orig", or other str}, optional + Default is "bidsv1.5.0". Must correspond to a key in ``config``. + out_dir : str, optional + Output directory. Default is current working directory ("."). + prefix : None or str, optional + Prefix to prepend to output filenames. Default is None, which means no prefix will be used. + config : str, optional + Path to configuration json file, which determines appropriate filenames based on file + descriptions. Default is "auto", which uses tedana's default configuration file. + make_figures : bool, optional + Whether or not to actually make a figures directory + + Attributes + ---------- + config : dict + File naming configuration information. + reference_img : img_like + The reference image which defines affine, shape, etc. of output images. + convention : str + The naming convention for output files. + out_dir : str + Directory in which outputs will be saved. + figures_dir : str + Directory in which figures will be saved. + This will correspond to a "figures" subfolder of ``out_dir``. + prefix : str + Prefix to prepend to output filenames. """ - Splits `data` time series into accepted component time series and remainder + + def __init__( + self, + reference_img, + convention="bidsv1.5.0", + out_dir=".", + prefix="", + config="auto", + make_figures=True + ): + + if config == "auto": + config = op.join(utils.get_resource_path(), "config", "outputs.json") + + if convention == "bids": + # modify to update default bids convention number + convention = "bidsv1.5.0" + + config = load_json(config) + + cfg = {} + for k, v in config.items(): + if convention not in v.keys(): + raise ValueError( + f"Convention {convention} is not one of the supported conventions " + f"({', '.join(v.keys())})" + ) + cfg[k] = v[convention] + self.config = cfg + self.reference_img = check_niimg(reference_img) + self.convention = convention + self.out_dir = op.abspath(out_dir) + self.figures_dir = op.join(out_dir, "figures") + self.prefix = prefix + "_" if prefix != "" else "" + + if not op.isdir(self.out_dir): + LGR.info(f"Generating output directory: {self.out_dir}") + os.mkdir(self.out_dir) + + if not op.isdir(self.figures_dir) and make_figures: + LGR.info(f"Generating figures directory: {self.figures_dir}") + os.mkdir(self.figures_dir) + + def _determine_extension(self, description, name): + """Infer the extension for a file based on its description. + + Parameters + ---------- + description : str + The description of the file. Corresponds to a key in ``self.config``. + name : str + Filename corresponding to the description within ``self.config``. + + Returns + ------- + extension : str + File extension for the filename. + """ + if description.endswith("img"): + allowed_extensions = [".nii", ".nii.gz"] + preferred_extension = ".nii.gz" + elif description.endswith("json"): + allowed_extensions = [".json"] + preferred_extension = ".json" + elif description.endswith("tsv"): + allowed_extensions = [".tsv"] + preferred_extension = ".tsv" + + if not any(name.endswith(ext) for ext in allowed_extensions): + extension = preferred_extension + else: + extension = "" + + return extension + + def get_name(self, description, **kwargs): + """Generate a file full path to simplify file output. + + Parameters + ---------- + description : str + The description of the file. Must be a key in ``self.config``. + kwargs : keyword arguments + Additional arguments used to format the base filename string. + The most common is ``echo``. + + Returns + ------- + name : str + The full path for the filename. + + Notes + ----- + This function uses kwargs to allow us to match named format + specifiers in a configuration with a variable passed to this + function. get_fields simplifies this process by creating a set of + name variables based on the configuration which we expect to match + a passed variable name, and then we fill in the value. + """ + name = self.config[description] + extension = self._determine_extension(description, name) + + name_variables = get_fields(name) + for key, value in kwargs.items(): + if key not in name_variables: + raise ValueError( + f'Argument {key} passed but has no match in format ' + f'string. Available format variables: ' + f'{name_variables} from {kwargs} and {name}.' + ) + + name = name.format(**kwargs) + name = op.join(self.out_dir, self.prefix + name + extension) + return name + + def save_file(self, data, description, **kwargs): + """Save data to a filename determined by the file's description and config info. + + Parameters + ---------- + data : dict or img_like or pandas.DataFrame + Data to save to file. + description : str + Description of the data, used to determine the appropriate filename from + ``self.config``. + + Returns + ------- + name : str + The full file path of the saved file. + """ + name = self.get_name(description, **kwargs) + if description.endswith("img"): + self.save_img(data, name) + elif description.endswith("json"): + self.save_json(data, name) + elif description.endswith("tsv"): + self.save_tsv(data, name) + + return name + + def save_img(self, data, name): + """Save image data to a nifti file. + + Parameters + ---------- + data : img_like + Data to save to a file. + name : str + Full file path for output file. + """ + data_type = type(data) + if not isinstance(data, np.ndarray): + raise TypeError( + f"Data supplied must of type np.ndarray, not {data_type}." + ) + if data.ndim not in (1, 2): + raise TypeError( + "Data must have number of dimensions in (1, 2), not " + f"{data.ndim}" + ) + img = new_nii_like(self.reference_img, data) + img.to_filename(name) + + def save_json(self, data, name): + """Save dictionary data to a json file. + + Parameters + ---------- + data : dict + Data to save to a file. + name : str + Full file path for output file. + """ + data_type = type(data) + if not isinstance(data, dict): + raise TypeError(f"data must be a dict, not type {data_type}.") + with open(name, "w") as fo: + json.dump(data, fo, indent=4, sort_keys=True) + + def save_tsv(self, data, name): + """Save DataFrame to a tsv file. + + Parameters + ---------- + data : pandas.DataFrame + Data to save to a file. + name : str + Full file path for output file. + """ + data_type = type(data) + if not isinstance(data, pd.DataFrame): + raise TypeError(f"data must be pd.Data, not type {data_type}.") + data.to_csv(name, sep="\t", line_terminator="\n", na_rep="n/a", index=False) + + +def get_fields(name): + """Identify all fields in an unformatted string. + + Examples + -------- + >>> string = "{field1}{field2}{field3}" + >>> fields = get_fields(string) + >>> fields + ["field1", "field2", "field3"] + """ + formatter = Formatter() + fields = [temp[1] for temp in formatter.parse(name) if temp[1] is not None] + return fields + + +def load_json(path: str) -> dict: + """Load a json file from path. Parameters ---------- - data : (S x T) array_like - Input data, where `S` is samples and `T` is time - mmix : (T x C) array_like - Mixing matrix for converting input data to component space, where `C` - is components and `T` is the same as in `data` - mask : (S,) array_like - Boolean mask array - comptable : (C x X) :obj:`pandas.DataFrame` - Component metric table. One row for each component, with a column for - each metric. Requires at least two columns: "component" and - "classification". + path: str + The path to the json file to load Returns ------- - hikts : (S x T) :obj:`numpy.ndarray` - Time series reconstructed using only components in `acc` - rest : (S x T) :obj:`numpy.ndarray` - Original data with `hikts` removed + data : dict + A dictionary representation of the JSON data. + + Raises + ------ + FileNotFoundError if the file does not exist + IsADirectoryError if the path is a directory instead of a file """ - acc = comptable[comptable.classification == 'accepted'].index.values + with open(path, 'r') as f: + try: + data = json.load(f) + except json.decoder.JSONDecodeError: + raise ValueError(f"File {path} is not a valid JSON.") + return data - cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True), - mmix, mask) - betas = cbetas[mask] - if len(acc) != 0: - hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) - else: - hikts = None - resid = data - hikts +def add_decomp_prefix(comp_num, prefix, max_value): + """ + Create component name with leading zeros matching number of components - return hikts, resid + Parameters + ---------- + comp_num : :obj:`int` + Component number + prefix : :obj:`str` + A prefix to prepend to the component name. An underscore is + automatically added between the prefix and the component number. + max_value : :obj:`int` + The maximum component number in the whole decomposition. Used to + determine the appropriate number of leading zeros in the component + name. + + Returns + ------- + comp_name : :obj:`str` + Component name in the form _ + """ + n_digits = int(np.log10(max_value)) + 1 + comp_name = '{0:08d}'.format(int(comp_num)) + comp_name = '{0}_{1}'.format(prefix, comp_name[8 - n_digits:]) + return comp_name -def write_split_ts(data, mmix, mask, comptable, ref_img, out_dir='.', suffix=''): +# File Writing Functions +def write_split_ts(data, mmix, mask, comptable, io_generator, echo=0): """ Splits `data` into denoised / noise / ignored time series and saves to disk @@ -73,12 +338,13 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, out_dir='.', suffix='') is components and `T` is the same as in `data` mask : (S,) array_like Boolean mask array - ref_img : :obj:`str` or img_like + io_generator : :obj:`tedana.io.OutputGenerator` Reference image to dictate how outputs are saved to disk out_dir : :obj:`str`, optional Output directory. - suffix : :obj:`str`, optional - Appended to name of saved files (before extension). Default: '' + echo: :obj: `int`, optional + Echo number to generate filenames, used by some verbose + functions. Default 0. Returns ------- @@ -89,14 +355,13 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, out_dir='.', suffix='') ----- This function writes out several files: - ====================== ================================================= - Filename Content - ====================== ================================================= - hik_ts_[suffix].nii High-Kappa time series. - midk_ts_[suffix].nii Mid-Kappa time series. - low_ts_[suffix].nii Low-Kappa time series. - dn_ts_[suffix].nii Denoised time series. - ====================== ================================================= + ============================ ============================================ + Filename Content + ============================ ============================================ + [prefix]Accepted_bold.nii.gz High-Kappa time series. + [prefix]Rejected_bold.nii.gz Low-Kappa time series. + [prefix]Denoised_bold.nii.gz Denoised time series. + ============================ ============================================ """ acc = comptable[comptable.classification == 'accepted'].index.values rej = comptable[comptable.classification == 'rejected'].index.values @@ -117,64 +382,51 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, out_dir='.', suffix='') dnts = data[mask] - lowkts if len(acc) != 0: - fout = filewrite(utils.unmask(hikts, mask), - op.join(out_dir, 'hik_ts_{0}'.format(suffix)), ref_img) - LGR.info('Writing high-Kappa time series: {}'.format(op.abspath(fout))) + if echo != 0: + fout = io_generator.save_file( + utils.unmask(hikts, mask), + 'high kappa ts split img', + echo=echo + ) + + else: + fout = io_generator.save_file( + utils.unmask(hikts, mask), + 'high kappa ts img', + ) + LGR.info('Writing high-Kappa time series: {}'.format(fout)) if len(rej) != 0: - fout = filewrite(utils.unmask(lowkts, mask), - op.join(out_dir, 'lowk_ts_{0}'.format(suffix)), ref_img) - LGR.info('Writing low-Kappa time series: {}'.format(op.abspath(fout))) + if echo != 0: + fout = io_generator.save_file( + utils.unmask(lowkts, mask), + 'low kappa ts split img', + echo=echo + ) + else: + fout = io_generator.save_file( + utils.unmask(lowkts, mask), + 'low kappa ts img', + ) + LGR.info('Writing low-Kappa time series: {}'.format(fout)) + + if echo != 0: + fout = io_generator.save_file( + utils.unmask(dnts, mask), + 'denoised ts split img', + echo=echo + ) + else: + fout = io_generator.save_file( + utils.unmask(dnts, mask), + 'denoised ts img', + ) - fout = filewrite(utils.unmask(dnts, mask), - op.join(out_dir, 'dn_ts_{0}'.format(suffix)), ref_img) - LGR.info('Writing denoised time series: {}'.format(op.abspath(fout))) + LGR.info('Writing denoised time series: {}'.format(fout)) return varexpl -def writefeats(data, mmix, mask, ref_img, out_dir='.', suffix=''): - """ - Converts `data` to component space with `mmix` and saves to disk - - Parameters - ---------- - data : (S x T) array_like - Input time series - mmix : (C x T) array_like - Mixing matrix for converting input data to component space, where `C` - is components and `T` is the same as in `data` - mask : (S,) array_like - Boolean mask array - ref_img : :obj:`str` or img_like - Reference image to dictate how outputs are saved to disk - out_dir : :obj:`str`, optional - Output directory. - suffix : :obj:`str`, optional - Appended to name of saved files (before extension). Default: '' - - Returns - ------- - fname : :obj:`str` - Filepath to saved file - - Notes - ----- - This function writes out a file: - - ====================== ================================================= - Filename Content - ====================== ================================================= - feats_[suffix].nii Z-normalized spatial component maps. - ====================== ================================================= - """ - - # write feature versions of components - feats = utils.unmask(computefeats2(data, mmix, mask), mask) - fname = filewrite(feats, op.join(out_dir, 'feats_{0}'.format(suffix)), ref_img) - return fname - - -def writeresults(ts, mask, comptable, mmix, n_vols, ref_img, out_dir='.'): +def writeresults(ts, mask, comptable, mmix, n_vols, io_generator): """ Denoises `ts` and saves all resulting files to disk @@ -195,52 +447,54 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img, out_dir='.'): Number of volumes in original time series ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk - out_dir : :obj:`str`, optional - Output directory. Notes ----- This function writes out several files: - ====================== ================================================= - Filename Content - ====================== ================================================= - hik_ts_OC.nii High-Kappa time series. Generated by - :py:func:`tedana.utils.io.write_split_ts`. - midk_ts_OC.nii Mid-Kappa time series. Generated by - :py:func:`tedana.utils.io.write_split_ts`. - low_ts_OC.nii Low-Kappa time series. Generated by - :py:func:`tedana.utils.io.write_split_ts`. - dn_ts_OC.nii Denoised time series. Generated by - :py:func:`tedana.utils.io.write_split_ts`. - betas_OC.nii Full ICA coefficient feature set. - betas_hik_OC.nii Denoised ICA coefficient feature set. - feats_OC2.nii Z-normalized spatial component maps. Generated - by :py:func:`tedana.utils.io.writefeats`. - ts_OC.nii Optimally combined 4D time series. - ====================== ================================================= + ========================================= ===================================== + Filename Content + ========================================= ===================================== + desc-optcomAccepted_bold.nii.gz High-Kappa time series. + desc-optcomRejected_bold.nii.gz Low-Kappa time series. + desc-optcomDenoised_bold.nii.gz Denoised time series. + desc-ICA_components.nii.gz Spatial component maps for all + components. + desc-ICAAccepted_components.nii.gz Spatial component maps for accepted + components. + desc-ICAAccepted_stat-z_components.nii.gz Z-normalized spatial component maps + for accepted components. + ========================================= ===================================== + + See Also + -------- + tedana.io.write_split_ts: Writes out time series files """ acc = comptable[comptable.classification == 'accepted'].index.values - - fout = filewrite(ts, op.join(out_dir, 'ts_OC'), ref_img) - LGR.info('Writing optimally combined time series: {}'.format(op.abspath(fout))) - - write_split_ts(ts, mmix, mask, comptable, ref_img, out_dir=out_dir, suffix='OC') + write_split_ts(ts, mmix, mask, comptable, io_generator) ts_B = get_coeffs(ts, mmix, mask) - fout = filewrite(ts_B, op.join(out_dir, 'betas_OC'), ref_img) - LGR.info('Writing full ICA coefficient feature set: {}'.format(op.abspath(fout))) + fout = io_generator.save_file(ts_B, 'ICA components img') + LGR.info('Writing full ICA coefficient feature set: {}'.format(fout)) if len(acc) != 0: - fout = filewrite(ts_B[:, acc], op.join(out_dir, 'betas_hik_OC'), ref_img) - LGR.info('Writing denoised ICA coefficient feature set: {}'.format(op.abspath(fout))) - fout = writefeats(split_ts(ts, mmix, mask, comptable)[0], - mmix[:, acc], mask, ref_img, out_dir=out_dir, - suffix='OC2') - LGR.info('Writing Z-normalized spatial component maps: {}'.format(op.abspath(fout))) - - -def writeresults_echoes(catd, mmix, mask, comptable, ref_img, out_dir='.'): + fout = io_generator.save_file( + ts_B[:, acc], + 'ICA accepted components img' + ) + LGR.info('Writing denoised ICA coefficient feature set: {}'.format(fout)) + + # write feature versions of components + feats = computefeats2(split_ts(ts, mmix, mask, comptable)[0], mmix[:, acc], mask) + feats = utils.unmask(feats, mask) + fname = io_generator.save_file( + feats, + 'z-scored ICA accepted components img' + ) + LGR.info('Writing Z-normalized spatial component maps: {}'.format(fname)) + + +def writeresults_echoes(catd, mmix, mask, comptable, io_generator): """ Saves individually denoised echos to disk @@ -258,116 +512,33 @@ def writeresults_echoes(catd, mmix, mask, comptable, ref_img, out_dir='.'): each metric. The index should be the component number. ref_img : :obj:`str` or img_like Reference image to dictate how outputs are saved to disk - out_dir : :obj:`str`, optional - Output directory. Notes ----- This function writes out several files: - ====================== ================================================= - Filename Content - ====================== ================================================= - hik_ts_e[echo].nii High-Kappa timeseries for echo number ``echo``. - Generated by - :py:func:`tedana.utils.io.write_split_ts`. - midk_ts_e[echo].nii Mid-Kappa timeseries for echo number ``echo``. - Generated by - :py:func:`tedana.utils.io.write_split_ts`. - lowk_ts_e[echo].nii Low-Kappa timeseries for echo number ``echo``. - Generated by - :py:func:`tedana.utils.io.write_split_ts`. - dn_ts_e[echo].nii Denoised timeseries for echo number ``echo``. - Generated by - :py:func:`tedana.utils.io.write_split_ts`. - ====================== ================================================= + ===================================== =================================== + Filename Content + ===================================== =================================== + echo-[echo]_desc-Accepted_bold.nii.gz High-Kappa timeseries for echo + number ``echo``. + echo-[echo]_desc-Rejected_bold.nii.gz Low-Kappa timeseries for echo + number ``echo``. + echo-[echo]_desc-Denoised_bold.nii.gz Denoised timeseries for echo + number ``echo``. + ===================================== =================================== + + See Also + -------- + tedana.io.write_split_ts: Writes out the files. """ for i_echo in range(catd.shape[1]): LGR.info('Writing Kappa-filtered echo #{:01d} timeseries'.format(i_echo + 1)) - write_split_ts(catd[:, i_echo, :], mmix, mask, comptable, ref_img, - out_dir=out_dir, suffix='e%i' % (i_echo + 1)) - - -def new_nii_like(ref_img, data, affine=None, copy_header=True): - """ - Coerces `data` into NiftiImage format like `ref_img` - - Parameters - ---------- - ref_img : :obj:`str` or img_like - Reference image - data : (S [x T]) array_like - Data to be saved - affine : (4 x 4) array_like, optional - Transformation matrix to be used. Default: `ref_img.affine` - copy_header : :obj:`bool`, optional - Whether to copy header from `ref_img` to new image. Default: True - - Returns - ------- - nii : :obj:`nibabel.nifti1.Nifti1Image` - NiftiImage - """ - - ref_img = check_niimg(ref_img) - newdata = data.reshape(ref_img.shape[:3] + data.shape[1:]) - if '.nii' not in ref_img.valid_exts: - # this is rather ugly and may lose some information... - nii = nib.Nifti1Image(newdata, affine=ref_img.affine, - header=ref_img.header) - else: - # nilearn's `new_img_like` is a very nice function - nii = new_img_like(ref_img, newdata, affine=affine, - copy_header=copy_header) - nii.set_data_dtype(data.dtype) - - return nii - - -def filewrite(data, filename, ref_img, gzip=True, copy_header=True): - """ - Writes `data` to `filename` in format of `ref_img` - - Parameters - ---------- - data : (S [x T]) array_like - Data to be saved - filename : :obj:`str` - Filepath where data should be saved to - ref_img : :obj:`str` or img_like - Reference image - gzip : :obj:`bool`, optional - Whether to gzip output (if not specified in `filename`). Only applies - if output dtype is NIFTI. Default: True - copy_header : :obj:`bool`, optional - Whether to copy header from `ref_img` to new image. Default: True - - Returns - ------- - name : :obj:`str` - Path of saved image (with added extensions, as appropriate) - """ - - # get reference image for comparison - if isinstance(ref_img, list): - ref_img = ref_img[0] - - # generate out file for saving - out = new_nii_like(ref_img, data, copy_header=copy_header) - - # FIXME: we only handle writing to nifti right now - # get root of desired output file and save as nifti image - root = op.dirname(filename) - base = op.basename(filename) - base, ext, add = splitext_addext(base) - root = op.join(root, base) - name = '{}.{}'.format(root, 'nii.gz' if gzip else 'nii') - out.to_filename(name) - - return name + write_split_ts(catd[:, i_echo, :], mmix, mask, comptable, io_generator, echo=(i_echo + 1)) +# File Loading Functions def load_data(data, n_echos=None): """ Coerces input `data` files to required 3D array output @@ -418,110 +589,78 @@ def load_data(data, n_echos=None): return fdata, ref_img -def add_decomp_prefix(comp_num, prefix, max_value): +# Helper Functions +def new_nii_like(ref_img, data, affine=None, copy_header=True): """ - Create component name with leading zeros matching number of components + Coerces `data` into NiftiImage format like `ref_img` Parameters ---------- - comp_num : :obj:`int` - Component number - prefix : :obj:`str` - A prefix to prepend to the component name. An underscore is - automatically added between the prefix and the component number. - max_value : :obj:`int` - The maximum component number in the whole decomposition. Used to - determine the appropriate number of leading zeros in the component - name. + ref_img : :obj:`str` or img_like + Reference image + data : (S [x T]) array_like + Data to be saved + affine : (4 x 4) array_like, optional + Transformation matrix to be used. Default: `ref_img.affine` + copy_header : :obj:`bool`, optional + Whether to copy header from `ref_img` to new image. Default: True Returns ------- - comp_name : :obj:`str` - Component name in the form _ - """ - n_digits = int(np.log10(max_value)) + 1 - comp_name = '{0:08d}'.format(int(comp_num)) - comp_name = '{0}_{1}'.format(prefix, comp_name[8 - n_digits:]) - return comp_name - - -def _rem_column_prefix(name): - """ - Remove column prefix - """ - return int(name.split('_')[-1]) - - -def _find_comp_rows(name): - """ - Find component rows - """ - is_valid = False - temp = name.split('_') - if len(temp) == 2 and temp[-1].isdigit(): - is_valid = True - return is_valid - - -def save_comptable(df, filename, label='ica', metadata=None): - """ - Save pandas DataFrame as a BIDS Derivatives-compatible json file. - - Parameters - ---------- - df : :obj:`pandas.DataFrame` - DataFrame to save to file. - filename : :obj:`str` - File to which to output DataFrame. - label : :obj:`str`, optional - Prefix to add to component names in json file. Generally either "ica" - or "pca". - metadata : :obj:`dict` or None, optional - Additional top-level metadata (e.g., decomposition description) to add - to json file. Default is None. + nii : :obj:`nibabel.nifti1.Nifti1Image` + NiftiImage """ - save_df = df.copy() - - if 'component' not in save_df.columns: - save_df['component'] = save_df.index - - # Rename components - max_value = save_df['component'].max() - save_df['component'] = save_df['component'].apply( - add_decomp_prefix, prefix=label, max_value=max_value) - save_df = save_df.set_index('component') - save_df = save_df.fillna('n/a') - - data = save_df.to_dict(orient='index') - if metadata is not None: - data = {**data, **metadata} + ref_img = check_niimg(ref_img) + newdata = data.reshape(ref_img.shape[:3] + data.shape[1:]) + if '.nii' not in ref_img.valid_exts: + # this is rather ugly and may lose some information... + nii = nib.Nifti1Image(newdata, affine=ref_img.affine, + header=ref_img.header) + else: + # nilearn's `new_img_like` is a very nice function + nii = new_img_like(ref_img, newdata, affine=affine, + copy_header=copy_header) + nii.set_data_dtype(data.dtype) - with open(filename, 'w') as fo: - json.dump(data, fo, sort_keys=True, indent=4) + return nii -def load_comptable(filename): +def split_ts(data, mmix, mask, comptable): """ - Load a BIDS Derivatives decomposition json file into a pandas DataFrame. + Splits `data` time series into accepted component time series and remainder Parameters ---------- - filename : :obj:`str` - File from which to load DataFrame. + data : (S x T) array_like + Input data, where `S` is samples and `T` is time + mmix : (T x C) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `data` + mask : (S,) array_like + Boolean mask array + comptable : (C x X) :obj:`pandas.DataFrame` + Component metric table. One row for each component, with a column for + each metric. Requires at least two columns: "component" and + "classification". Returns ------- - df : :obj:`pandas.DataFrame` - DataFrame with contents from filename. + hikts : (S x T) :obj:`numpy.ndarray` + Time series reconstructed using only components in `acc` + resid : (S x T) :obj:`numpy.ndarray` + Original data with `hikts` removed """ - with open(filename, 'r') as fo: - data = json.load(fo) - data = {d: data[d] for d in data.keys() if _find_comp_rows(d)} - df = pd.DataFrame.from_dict(data, orient='index') - df = df.replace('n/a', np.nan) # our jsons store nans as 'n/a' - df['component'] = df.index - df['component'] = df['component'].apply(_rem_column_prefix) - df = df.set_index('component', drop=True) - df.index.name = 'component' - return df + acc = comptable[comptable.classification == 'accepted'].index.values + + cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True), + mmix, mask) + betas = cbetas[mask] + if len(acc) != 0: + hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) + else: + hikts = None + + resid = data - hikts + + return hikts, resid diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py index f0cb00591..f606fa6a1 100644 --- a/tedana/metrics/kundu_fit.py +++ b/tedana/metrics/kundu_fit.py @@ -2,7 +2,6 @@ Fit models. """ import logging -import os.path as op import numpy as np import pandas as pd @@ -20,9 +19,9 @@ Z_MAX = 8 -def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, +def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, io_generator, reindex=False, mmixN=None, algorithm=None, label=None, - out_dir='.', verbose=False): + verbose=False): """ Fit TE-dependence and -independence models to components. @@ -42,20 +41,15 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, For more information on thresholding, see `make_adaptive_mask`. tes : list List of echo times associated with `catd`, in milliseconds - ref_img : str or img_like - Reference image to dictate how outputs are saved to disk + io_generator : tedana.io.OutputGenerator + The output generation object for this workflow reindex : bool, optional Whether to sort components in descending order by Kappa. Default: False mmixN : (T x C) array_like, optional Z-scored mixing matrix. Default: None algorithm : {'kundu_v2', 'kundu_v3', None}, optional Decision tree to be applied to metrics. Determines which maps will be - generated and stored in seldict. Default: None - label : :obj:`str` or None, optional - Prefix to apply to generated files. Default is None. - out_dir : :obj:`str`, optional - Output directory for generated files. Default is current working - directory. + generated and stored in ``metric_maps``. Default: None verbose : :obj:`bool`, optional Whether or not to generate additional files. Default is False. @@ -64,10 +58,13 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index is the component number. - seldict : :obj:`dict` or None + metric_maps : :obj:`dict` or None Dictionary containing component-specific metric maps to be used for - component selection. If `algorithm` is None, then seldict will be None as + component selection. If `algorithm` is None, then metric_maps will be None as well. + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. betas : :obj:`numpy.ndarray` mmix_corrected : :obj:`numpy.ndarray` Mixing matrix after sign correction and resorting (if reindex is True). @@ -228,28 +225,83 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, tsoc_B = tsoc_B[:, sort_idx] if verbose: - # Echo-specific weight maps for each of the ICA components. - io.filewrite(utils.unmask(betas, mask), - op.join(out_dir, '{0}betas_catd.nii'.format(label)), - ref_img) - - # Echo-specific maps of predicted values for R2 and S0 models for each - # component. - io.filewrite(utils.unmask(pred_R2_maps, mask), - op.join(out_dir, '{0}R2_pred.nii'.format(label)), ref_img) - io.filewrite(utils.unmask(pred_S0_maps, mask), - op.join(out_dir, '{0}S0_pred.nii'.format(label)), ref_img) + for i_echo in range(n_echos): + # Echo-specific weight maps for each of the ICA components. + echo_betas = betas[:, i_echo, :] + io_generator.save_file( + utils.unmask(echo_betas, mask), + 'echo weight ' + label + ' map split img', + echo=(i_echo + 1) + ) + + # Echo-specific maps of predicted values for R2 and S0 models for each + # component. + echo_pred_R2_maps = pred_R2_maps[:, i_echo, :] + io_generator.save_file( + utils.unmask(echo_pred_R2_maps, mask), + 'echo R2 ' + label + ' split img', + echo=(i_echo + 1) + ) + echo_pred_S0_maps = pred_S0_maps[:, i_echo, :] + io_generator.save_file( + utils.unmask(echo_pred_S0_maps, mask), + 'echo S0 ' + label + ' split img', + echo=(i_echo + 1) + ) + # Weight maps used to average metrics across voxels - io.filewrite(utils.unmask(Z_maps ** 2., mask), - op.join(out_dir, '{0}metric_weights.nii'.format(label)), - ref_img) + io_generator.save_file( + utils.unmask(Z_maps ** 2., mask), + label + ' component weights img', + ) del pred_R2_maps, pred_S0_maps comptable = pd.DataFrame(comptable, columns=['kappa', 'rho', 'variance explained', 'normalized variance explained']) - comptable.index.name = 'component' + comptable["Component"] = [ + io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.shape[0]) + for comp in comptable.index.values + ] + metric_metadata = { + "kappa": { + "LongName": "Kappa", + "Description": ( + "A pseudo-F-statistic indicating TE-dependence of the component. " + "This metric is calculated by computing fit to the TE-dependence model " + "at each voxel, and then performing a weighted average based on the " + "voxel-wise weights of the component." + ), + "Units": "arbitrary", + }, + "rho": { + "LongName": "Rho", + "Description": ( + "A pseudo-F-statistic indicating TE-independence of the component. " + "This metric is calculated by computing fit to the TE-independence model " + "at each voxel, and then performing a weighted average based on the " + "voxel-wise weights of the component." + ), + "Units": "arbitrary", + }, + "variance explained": { + "LongName": "Variance explained", + "Description": ( + "Variance explained in the optimally combined data of each component. " + "On a scale from 0 to 100." + ), + "Units": "arbitrary", + }, + "normalized variance explained": { + "LongName": "Normalized variance explained", + "Description": ( + "Normalized variance explained in the optimally combined data of each component. " + "On a scale from 0 to 1." + ), + "Units": "arbitrary", + }, + } # Generate clustering criteria for component selection if algorithm in ['kundu_v2', 'kundu_v3']: @@ -265,7 +317,7 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, for i_comp in range(n_components): # Cluster-extent threshold and binarize F-maps ccimg = io.new_nii_like( - ref_img, + io_generator.reference_img, np.squeeze(utils.unmask(F_R2_maps[:, i_comp], mask))) F_R2_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, @@ -273,7 +325,7 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, countsigFR2 = F_R2_clmaps[:, i_comp].sum() ccimg = io.new_nii_like( - ref_img, + io_generator.reference_img, np.squeeze(utils.unmask(F_S0_maps[:, i_comp], mask))) F_S0_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=fmin, mask=mask, @@ -282,7 +334,7 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, # Cluster-extent threshold and binarize Z-maps with CDT of p < 0.05 ccimg = io.new_nii_like( - ref_img, + io_generator.reference_img, np.squeeze(utils.unmask(Z_maps[:, i_comp], mask))) Z_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, threshold=1.95, mask=mask, @@ -290,7 +342,7 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, # Cluster-extent threshold and binarize ranked signal-change map ccimg = io.new_nii_like( - ref_img, + io_generator.reference_img, utils.unmask(stats.rankdata(tsoc_Babs[:, i_comp]), mask)) Br_R2_clmaps[:, i_comp] = utils.threshold_map( ccimg, min_cluster_size=csize, @@ -304,29 +356,33 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, if algorithm == 'kundu_v2': # WTS, tsoc_B, PSC, and F_S0_maps are not used by Kundu v2.5 - selvars = ['Z_maps', 'F_R2_maps', - 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', - 'Br_R2_clmaps', 'Br_S0_clmaps'] + metric_maps_to_retain = [ + 'Z_maps', 'F_R2_maps', + 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', + 'Br_R2_clmaps', 'Br_S0_clmaps' + ] elif algorithm == 'kundu_v3': - selvars = ['WTS', 'tsoc_B', 'PSC', - 'Z_maps', 'F_R2_maps', 'F_S0_maps', - 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', - 'Br_R2_clmaps', 'Br_S0_clmaps'] + metric_maps_to_retain = [ + 'WTS', 'tsoc_B', 'PSC', + 'Z_maps', 'F_R2_maps', 'F_S0_maps', + 'Z_clmaps', 'F_R2_clmaps', 'F_S0_clmaps', + 'Br_R2_clmaps', 'Br_S0_clmaps' + ] elif algorithm is None: - selvars = [] + metric_maps_to_retain = [] else: raise ValueError('Algorithm "{0}" not recognized.'.format(algorithm)) - seldict = {} - for vv in selvars: - seldict[vv] = eval(vv) + metric_maps = {} + for vv in metric_maps_to_retain: + metric_maps[vv] = eval(vv) else: - seldict = None + metric_maps = None - return comptable, seldict, betas, mmix_corrected + return comptable, metric_maps, metric_metadata, betas, mmix_corrected -def kundu_metrics(comptable, metric_maps): +def kundu_metrics(comptable, metric_maps, metric_metadata): """ Compute metrics used by Kundu v2.5 and v3.2 decision trees. @@ -337,13 +393,20 @@ def kundu_metrics(comptable, metric_maps): metric_maps : :obj:`dict` A dictionary with component-specific feature maps used for classification. The value for each key is a (S x C) array, where `S` is - voxels and `C` is components. Generated by `dependence_metrics` + voxels and `C` is components. Generated by + :py:func:`tedana.metrics.dependence_metrics`. + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. Returns ------- comptable : (C x M) :obj:`pandas.DataFrame` Component metrics to be used for component selection, with new metrics added. + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. """ Z_maps = metric_maps['Z_maps'] Z_clmaps = metric_maps['Z_clmaps'] @@ -358,7 +421,23 @@ def kundu_metrics(comptable, metric_maps): model F-statistic maps. """ comptable['countsigFR2'] = F_R2_clmaps.sum(axis=0) + metric_metadata["countsigFR2"] = { + "LongName": "R2 model F-statistic map significant voxel count", + "Description": ( + "Number of significant voxels from the cluster-extent " + "thresholded R2 model F-statistic map for each component." + ), + "Units": "voxel", + } comptable['countsigFS0'] = F_S0_clmaps.sum(axis=0) + metric_metadata["countsigFS0"] = { + "LongName": "S0 model F-statistic map significant voxel count", + "Description": ( + "Number of significant voxels from the cluster-extent " + "thresholded S0 model F-statistic map for each component." + ), + "Units": "voxel", + } """ Generate Dice values for R2 and S0 models @@ -369,6 +448,7 @@ def kundu_metrics(comptable, metric_maps): """ comptable['dice_FR2'] = np.zeros(comptable.shape[0]) comptable['dice_FS0'] = np.zeros(comptable.shape[0]) + for i_comp in comptable.index: comptable.loc[i_comp, 'dice_FR2'] = utils.dice(Br_R2_clmaps[:, i_comp], F_R2_clmaps[:, i_comp]) @@ -377,6 +457,22 @@ def kundu_metrics(comptable, metric_maps): comptable.loc[np.isnan(comptable['dice_FR2']), 'dice_FR2'] = 0 comptable.loc[np.isnan(comptable['dice_FS0']), 'dice_FS0'] = 0 + metric_metadata["dice_FR2"] = { + "LongName": "R2 model beta map-F-statistic map Dice similarity index", + "Description": ( + "Dice value of cluster-extent thresholded maps of R2-model betas " + "and F-statistics." + ), + "Units": "arbitrary", + } + metric_metadata["dice_FS0"] = { + "LongName": "S0 model beta map-F-statistic map Dice similarity index", + "Description": ( + "Dice value of cluster-extent thresholded maps of S0-model betas " + "and F-statistics." + ), + "Units": "arbitrary", + } """ Generate three metrics of component noise: @@ -406,6 +502,32 @@ def kundu_metrics(comptable, metric_maps): comptable.loc[np.isnan(comptable['signal-noise_t']), 'signal-noise_t'] = 0 comptable.loc[np.isnan(comptable['signal-noise_p']), 'signal-noise_p'] = 0 + metric_metadata["countnoise"] = { + "LongName": "Noise voxel count", + "Description": ( + "Number of 'noise' voxels (voxels highly weighted for " + "component, but not from clusters) from each component." + ), + "Units": "voxel", + } + metric_metadata["signal-noise_t"] = { + "LongName": "Signal > noise t-statistic", + "Description": ( + "T-statistic for two-sample t-test of F-statistics from " + "'signal' voxels (voxels in clusters) against 'noise' voxels (voxels not " + "in clusters) for R2 model." + ), + "Units": "arbitrary", + } + metric_metadata["signal-noise_p"] = { + "LongName": "Signal > noise p-value", + "Description": ( + "P-value for two-sample t-test of F-statistics from " + "'signal' voxels (voxels in clusters) against 'noise' voxels (voxels not " + "in clusters) for R2 model." + ), + "Units": "arbitrary", + } """ Assemble decision table with five metrics: @@ -427,5 +549,13 @@ def kundu_metrics(comptable, metric_maps): stats.rankdata(comptable['countnoise']), comptable.shape[0] - stats.rankdata(comptable['countsigFR2'])]).T comptable['d_table_score'] = d_table_rank.mean(axis=1) - - return comptable + metric_metadata["d_table_score"] = { + "LongName": "Decision table score", + "Description": ( + "Summary score compiled from five metrics, with smaller values " + "(i.e., higher ranks) indicating more BOLD dependence and less noise." + ), + "Units": "arbitrary", + } + + return comptable, metric_metadata diff --git a/tedana/reporting/dynamic_figures.py b/tedana/reporting/dynamic_figures.py index 2be9257fc..006a546ea 100644 --- a/tedana/reporting/dynamic_figures.py +++ b/tedana/reporting/dynamic_figures.py @@ -75,10 +75,7 @@ def _create_data_struct(comptable_path, color_mapping=color_mapping): 'd_table_score', 'kappa ratio', 'rationale', 'd_table_score_scrub'] - df = pd.read_json(comptable_path) - df.drop('Description', axis=0, inplace=True) - df.drop('Method', axis=1, inplace=True) - df = df.T + df = pd.read_table(comptable_path) n_comps = df.shape[0] # remove space from column name @@ -232,7 +229,7 @@ def _create_varexp_pie_plt(comptable_cds, n_comps): return fig -def _tap_callback(comptable_cds, div_content, out_dir): +def _tap_callback(comptable_cds, div_content, io_generator): """ Javacript function to animate tap events and show component info on the right @@ -243,6 +240,9 @@ def _tap_callback(comptable_cds, div_content, out_dir): div: bokeh.models.Div Target Div element where component images will be loaded + io_generator: tedana.io.OutputGenerator + Output generating object for this workflow + Returns ------- CustomJS: bokeh.models.CustomJS @@ -250,10 +250,11 @@ def _tap_callback(comptable_cds, div_content, out_dir): """ return models.CustomJS(args=dict(source_comp_table=comptable_cds, div=div_content, - outdir=out_dir), code=tap_callback_jscode) + outdir=io_generator.out_dir), + code=tap_callback_jscode) -def _link_figures(fig, comptable_ds, div_content, out_dir): +def _link_figures(fig, comptable_ds, div_content, io_generator): """ Links figures and adds interaction on mouse-click. @@ -269,8 +270,8 @@ def _link_figures(fig, comptable_ds, div_content, out_dir): div_content : bokeh.models.Div Div element for additional HTML content. - out_dir : str - Output directory of tedana results. + io_generator: `tedana.io.OutputGenerator` + Output generating object for this workflow Returns ------- @@ -282,5 +283,5 @@ def _link_figures(fig, comptable_ds, div_content, out_dir): fig.js_on_event(events.Tap, _tap_callback(comptable_ds, div_content, - out_dir)) + io_generator)) return fig diff --git a/tedana/reporting/html_report.py b/tedana/reporting/html_report.py index 264352a05..e3eeca6a4 100644 --- a/tedana/reporting/html_report.py +++ b/tedana/reporting/html_report.py @@ -55,12 +55,12 @@ def _save_as_html(body): return html -def generate_report(out_dir, tr): +def generate_report(io_generator, tr): """ Parameters ---------- - out_dir : str - File path to a completed tedana output directory + io_generator : tedana.io.OutputGenerator + io_generator object for this workflow's output tr : float The repetition time (TR) for the collected multi-echo sequence @@ -71,12 +71,12 @@ def generate_report(out_dir, tr): A generated HTML report """ # Load the component time series - comp_ts_path = opj(out_dir, 'ica_mixing.tsv') + comp_ts_path = io_generator.get_name("ICA mixing tsv") comp_ts_df = pd.read_csv(comp_ts_path, sep='\t', encoding='utf=8') n_vols, n_comps = comp_ts_df.shape # Load the component table - comptable_path = opj(out_dir, 'ica_decomposition.json') + comptable_path = io_generator.get_name("ICA metrics tsv") comptable_cds = df._create_data_struct(comptable_path) # Create kappa rho plot @@ -98,7 +98,7 @@ def generate_report(out_dir, tr): div_content = models.Div(width=500, height=750, height_policy='fixed') for fig in figs: - df._link_figures(fig, comptable_cds, div_content, out_dir=out_dir) + df._link_figures(fig, comptable_cds, div_content, io_generator) # Create a layout app = layouts.gridplot([[ @@ -111,10 +111,10 @@ def generate_report(out_dir, tr): kr_script, kr_div = embed.components(app) # Read in relevant methods - with open(opj(out_dir, 'report.txt'), 'r+') as f: + with open(opj(io_generator.out_dir, 'report.txt'), 'r+') as f: about = f.read() body = _update_template_bokeh(kr_div, about, kr_script) html = _save_as_html(body) - with open(opj(out_dir, 'tedana_report.html'), 'wb') as f: + with open(opj(io_generator.out_dir, 'tedana_report.html'), 'wb') as f: f.write(html.encode('utf-8')) diff --git a/tedana/reporting/static_figures.py b/tedana/reporting/static_figures.py index fb6a0e44b..e170b0a2c 100644 --- a/tedana/reporting/static_figures.py +++ b/tedana/reporting/static_figures.py @@ -43,7 +43,7 @@ def _trim_edge_zeros(arr): return arr[bounding_box] -def comp_figures(ts, mask, comptable, mmix, ref_img, out_dir, png_cmap): +def comp_figures(ts, mask, comptable, mmix, io_generator, png_cmap): """ Creates static figures that highlight certain aspects of tedana processing This includes a figure for each component showing the component time course, @@ -61,26 +61,23 @@ def comp_figures(ts, mask, comptable, mmix, ref_img, out_dir, png_cmap): mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` - ref_img : :obj:`str` or img_like - Reference image to dictate how outputs are saved to disk - out_dir : :obj:`str` - Figures folder within output directory - + io_generator : :obj:`tedana.io.OutputGenerator` + Output Generator object to use for this workflow """ # Get the lenght of the timeseries n_vols = len(mmix) # regenerate the beta images ts_B = stats.get_coeffs(ts, mmix, mask) - ts_B = ts_B.reshape(ref_img.shape[:3] + ts_B.shape[1:]) + ts_B = ts_B.reshape(io_generator.reference_img.shape[:3] + ts_B.shape[1:]) # trim edges from ts_B array ts_B = _trim_edge_zeros(ts_B) # Mask out remaining zeros ts_B = np.ma.masked_where(ts_B == 0, ts_B) - # Get repetition time from ref_img - tr = ref_img.header.get_zooms()[-1] + # Get repetition time from reference image + tr = io_generator.reference_img.header.get_zooms()[-1] # Create indices for 6 cuts, based on dimensions cuts = [ts_B.shape[dim] // 6 for dim in range(3)] @@ -182,6 +179,6 @@ def comp_figures(ts, mask, comptable, mmix, ref_img, out_dir, png_cmap): # Fix spacing so TR label does overlap with other plots allplot.subplots_adjust(hspace=0.4) plot_name = 'comp_{}.png'.format(str(compnum).zfill(3)) - compplot_name = os.path.join(out_dir, plot_name) + compplot_name = os.path.join(io_generator.out_dir, 'figures', plot_name) plt.savefig(compplot_name) plt.close() diff --git a/tedana/resources/config/outputs.json b/tedana/resources/config/outputs.json new file mode 100644 index 000000000..674984c81 --- /dev/null +++ b/tedana/resources/config/outputs.json @@ -0,0 +1,186 @@ +{ + "adaptive mask img": { + "orig": "adaptive_mask", + "bidsv1.5.0": "desc-adaptiveGoodSignal_mask" + }, + "t2star img": { + "orig": "t2sv", + "bidsv1.5.0": "T2starmap" + }, + "s0 img": { + "orig": "s0v", + "bidsv1.5.0": "S0map" + }, + "combined img": { + "orig": "ts_OC", + "bidsv1.5.0": "desc-optcom_bold" + }, + "ICA components img": { + "orig": "ica_components", + "bidsv1.5.0": "desc-ICA_components" + }, + "z-scored PCA components img": { + "orig": "pca_components", + "bidsv1.5.0": "desc-PCA_stat-z_components" + }, + "z-scored ICA components img": { + "orig": "betas_OC", + "bidsv1.5.0": "desc-ICA_stat-z_components" + }, + "ICA accepted components img": { + "orig": "betas_hik_OC", + "bidsv1.5.0": "desc-ICAAccepted_components" + }, + "z-scored ICA accepted components img": { + "orig": "feats_OC2", + "bidsv1.5.0": "desc-ICAAccepted_stat-z_components" + }, + "denoised ts img": { + "orig": "dn_ts_OC", + "bidsv1.5.0": "desc-optcomDenoised_bold" + }, + "high kappa ts img": { + "orig": "hik_ts_OC", + "bidsv1.5.0": "desc-optcomAccepted_bold" + }, + "low kappa ts img": { + "orig": "lowk_ts_OC", + "bidsv1.5.0": "desc-optcomRejected_bold" + }, + "full t2star img": { + "orig": "t2svG", + "bidsv1.5.0": "desc-full_T2starmap" + }, + "full s0 img": { + "orig": "s0vG", + "bidsv1.5.0": "desc-full_S0map" + }, + "whitened img": { + "orig": "ts_OC_whitened", + "bidsv1.5.0": "desc-optcomPCAReduced_bold" + }, + "echo weight PCA map split img": { + "orig": "e{echo}_PCA_comp", + "bidsv1.5.0": "echo-{echo}_desc-PCA_components" + }, + "echo R2 PCA split img": { + "orig": "e{echo}_PCA_R2", + "bidsv1.5.0": "echo-{echo}_desc-PCAR2ModelPredictions_components" + }, + "echo S0 PCA split img": { + "orig": "e{echo}_PCA_S0", + "bidsv1.5.0": "echo-{echo}_desc-PCAS0ModelPredictions_components" + }, + "PCA component weights img": { + "orig": "pca_weights", + "bidsv1.5.0": "desc-PCAAveragingWeights_components" + }, + "PCA reduced img": { + "orig": "oc_reduced", + "bidsv1.5.0": "desc-optcomPCAReduced_bold" + }, + "echo weight ICA map split img": { + "orig": "e{echo}_ICA_comp", + "bidsv1.5.0": "echo-{echo}_desc-ICA_components" + }, + "echo R2 ICA split img": { + "orig": "e{echo}_ICA_R2", + "bidsv1.5.0": "echo-{echo}_desc-ICAR2ModelPredictions_components" + }, + "echo S0 ICA split img": { + "orig": "e{echo}_ICA_S0", + "bidsv1.5.0": "echo-{echo}_desc-ICAS0ModelPredictions_components" + }, + "ICA component weights img": { + "orig": "ica_weights", + "bidsv1.5.0": "desc-ICAAveragingWeights_components" + }, + "high kappa ts split img": { + "orig": "hik_ts_e{echo}", + "bidsv1.5.0": "echo-{echo}_desc-Accepted_bold" + }, + "low kappa ts split img": { + "orig": "lowk_ts_e{echo}", + "bidsv1.5.0": "echo-{echo}_desc-Rejected_bold" + }, + "denoised ts split img": { + "orig": "dn_ts_e{echo}", + "bidsv1.5.0": "echo-{echo}_desc-Denoised_bold" + }, + "gs img": { + "orig": "T1gs", + "bidsv1.5.0": "desc-globalSignal_map" + }, + "has gs combined img": { + "orig": "tsoc_orig", + "bidsv1.5.0": "desc-optcomWithGlobalSignal_bold" + }, + "removed gs combined img": { + "orig": "tsoc_nogs", + "bidsv1.5.0": "desc-optcomNoGlobalSignal_bold" + }, + "t1 like img": { + "orig": "sphis_hik", + "bidsv1.5.0": "desc-T1likeEffect_min" + }, + "ICA accepted mir denoised img": { + "orig": "hik_ts_OC_MIR", + "bidsv1.5.0": "desc-optcomAcceptedMIRDenoised_bold" + }, + "mir denoised img": { + "orig": "dn_ts_OC_MIR", + "bidsv1.5.0": "desc-optcomMIRDenoised_bold" + }, + "ICA accepted mir component weights img": { + "orig": "betas_hik_OC_MIR", + "bidsv1.5.0": "desc-ICAAcceptedMIRDenoised_components" + }, + "data description json": { + "orig": "dataset_description", + "bidsv1.5.0": "dataset_description" + }, + "PCA decomposition json": { + "orig": "pca_decomposition", + "bidsv1.5.0": "desc-PCA_decomposition" + }, + "PCA metrics json": { + "orig": "pca_metrics", + "bidsv1.5.0": "desc-PCA_metrics" + }, + "ICA decomposition json": { + "orig": "ica_decomposition", + "bidsv1.5.0": "desc-ICA_decomposition" + }, + "ICA metrics json": { + "orig": "ica_metrics", + "bidsv1.5.0": "desc-tedana_metrics" + }, + "PCA mixing tsv": { + "orig": "pca_mixing", + "bidsv1.5.0": "desc-PCA_mixing" + }, + "PCA metrics tsv": { + "orig": "pca_metrics", + "bidsv1.5.0": "desc-PCA_metrics" + }, + "ICA mixing tsv": { + "orig": "ica_mixing", + "bidsv1.5.0": "desc-ICA_mixing" + }, + "ICA metrics tsv": { + "orig": "ica_metrics", + "bidsv1.5.0": "desc-tedana_metrics" + }, + "global signal time series tsv": { + "orig": "global_signal_ts", + "bidsv1.5.0": "desc-globalSignal_timeseries" + }, + "ICA MIR mixing tsv": { + "orig": "ica_mir_mixing", + "bidsv1.5.0": "desc-ICAMIRDenoised_mixing" + }, + "ICA orthogonalized mixing tsv": { + "orig": "ica_orth_mixing", + "bidsv1.5.0": "desc-ICAOrth_mixing" + } +} diff --git a/tedana/selection/tedica.py b/tedana/selection/tedica.py index 8bd233340..74cf95730 100644 --- a/tedana/selection/tedica.py +++ b/tedana/selection/tedica.py @@ -13,7 +13,7 @@ RefLGR = logging.getLogger('REFERENCES') -def manual_selection(comptable, acc=None, rej=None): +def manual_selection(comptable, metric_metadata, acc=None, rej=None): """ Perform manual selection of components. @@ -21,6 +21,9 @@ def manual_selection(comptable, acc=None, rej=None): ---------- comptable : (C x M) :obj:`pandas.DataFrame` Component metric table, where `C` is components and `M` is metrics + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. acc : :obj:`list`, optional List of accepted components. Default is None. rej : :obj:`list`, optional @@ -30,6 +33,9 @@ def manual_selection(comptable, acc=None, rej=None): ------- comptable : (C x M) :obj:`pandas.DataFrame` Component metric table with classification. + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. """ LGR.info('Performing manual ICA component selection') RepLGR.info("Next, components were manually classified as " @@ -39,6 +45,26 @@ def manual_selection(comptable, acc=None, rej=None): 'original_classification' not in comptable.columns): comptable['original_classification'] = comptable['classification'] comptable['original_rationale'] = comptable['rationale'] + metric_metadata["original_classification"] = { + "LongName": "Original classification", + "Description": ( + "Classification from the original decision tree." + ), + "Levels": { + "accepted": "A BOLD-like component included in denoised and high-Kappa data.", + "rejected": "A non-BOLD component excluded from denoised and high-Kappa data.", + "ignored": ( + "A low-variance component included in denoised, " + "but excluded from high-Kappa data."), + }, + } + metric_metadata["original_rationale"] = { + "LongName": "Original rationale", + "Description": ( + "The reason for the original classification. " + "Please see tedana's documentation for information about possible rationales." + ), + } comptable['classification'] = 'accepted' comptable['rationale'] = '' @@ -68,12 +94,33 @@ def manual_selection(comptable, acc=None, rej=None): comptable.loc[ign, 'classification'] = 'ignored' comptable.loc[ign, 'rationale'] += 'I001;' + metric_metadata["classification"] = { + "LongName": "Component classification", + "Description": ( + "Classification from the manual classification procedure." + ), + "Levels": { + "accepted": "A BOLD-like component included in denoised and high-Kappa data.", + "rejected": "A non-BOLD component excluded from denoised and high-Kappa data.", + "ignored": ( + "A low-variance component included in denoised, " + "but excluded from high-Kappa data."), + }, + } + metric_metadata["rationale"] = { + "LongName": "Rationale for component classification", + "Description": ( + "The reason for the original classification. " + "Please see tedana's documentation for information about possible rationales." + ), + } + # Move decision columns to end comptable = clean_dataframe(comptable) - return comptable + return comptable, metric_metadata -def kundu_selection_v2(comptable, n_echos, n_vols): +def kundu_selection_v2(comptable, metric_metadata, n_echos, n_vols): """ Classify components as "accepted," "rejected," or "ignored" based on relevant metrics. @@ -89,6 +136,9 @@ def kundu_selection_v2(comptable, n_echos, n_vols): comptable : (C x M) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index should be the component number. + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. n_echos : :obj:`int` Number of echos in original data n_vols : :obj:`int` @@ -99,6 +149,9 @@ def kundu_selection_v2(comptable, n_echos, n_vols): comptable : :obj:`pandas.DataFrame` Updated component table with additional metrics and with classification (accepted, rejected, or ignored) + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. Notes ----- @@ -199,7 +252,7 @@ def kundu_selection_v2(comptable, n_echos, n_vols): # Move decision columns to end comptable = clean_dataframe(comptable) - return comptable + return comptable, metric_metadata """ Step 2: Make a guess for what the good components are, in order to @@ -258,7 +311,7 @@ def kundu_selection_v2(comptable, n_echos, n_vols): # Move decision columns to end comptable = clean_dataframe(comptable) - return comptable + return comptable, metric_metadata # Calculate "rate" for kappa: kappa range divided by variance explained # range, for potentially accepted components @@ -268,6 +321,14 @@ def kundu_selection_v2(comptable, n_echos, n_vols): (np.max(comptable.loc[acc_prov, 'variance explained']) - np.min(comptable.loc[acc_prov, 'variance explained']))) comptable['kappa ratio'] = kappa_rate * comptable['variance explained'] / comptable['kappa'] + metric_metadata["kappa ratio"] = { + "LongName": "Kappa ratio", + "Description": ( + "Ratio score calculated by dividing range of kappa values by range of " + "variance explained values." + ), + "Units": "arbitrary", + } # Calculate bounds for variance explained varex_lower = stats.scoreatpercentile( @@ -325,6 +386,16 @@ def kundu_selection_v2(comptable, n_echos, n_vols): (comptable.loc[unclf, 'rho'] < rho_elbow)), np.sum(comptable.loc[unclf, 'kappa'] > kappa_elbow)])) + metric_metadata["d_table_score_scrub"] = { + "LongName": "Updated decision table score", + "Description": ( + "Summary score compiled from five metrics and computed from a " + "subset of components, with smaller values " + "(i.e., higher ranks) indicating more BOLD dependence and less noise." + ), + "Units": "arbitrary", + } + # Rejection candidate based on artifact type A: candartA conservative_guess = num_acc_guess / RESTRICT_FACTOR candartA = np.intersect1d( @@ -370,6 +441,27 @@ def kundu_selection_v2(comptable, n_echos, n_vols): # at this point, unclf is equivalent to accepted + metric_metadata["classification"] = { + "LongName": "Component classification", + "Description": ( + "Classification from the classification procedure." + ), + "Levels": { + "accepted": "A BOLD-like component included in denoised and high-Kappa data.", + "rejected": "A non-BOLD component excluded from denoised and high-Kappa data.", + "ignored": ( + "A low-variance component included in denoised, " + "but excluded from high-Kappa data."), + }, + } + metric_metadata["rationale"] = { + "LongName": "Rationale for component classification", + "Description": ( + "The reason for the original classification. " + "Please see tedana's documentation for information about possible rationales." + ), + } + # Move decision columns to end comptable = clean_dataframe(comptable) - return comptable + return comptable, metric_metadata diff --git a/tedana/selection/tedpca.py b/tedana/selection/tedpca.py index 255929c35..7d50dc46b 100644 --- a/tedana/selection/tedpca.py +++ b/tedana/selection/tedpca.py @@ -15,7 +15,7 @@ F_MAX = 500 -def kundu_tedpca(comptable, n_echos, kdaw=10., rdaw=1., stabilize=False): +def kundu_tedpca(comptable, metric_metadata, n_echos, kdaw=10., rdaw=1., stabilize=False): """ Select PCA components using Kundu's decision tree approach. @@ -24,6 +24,9 @@ def kundu_tedpca(comptable, n_echos, kdaw=10., rdaw=1., stabilize=False): comptable : :obj:`pandas.DataFrame` Component table with relevant metrics: kappa, rho, and normalized variance explained. Component number should be the index. + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. n_echos : :obj:`int` Number of echoes in dataset. kdaw : :obj:`float`, optional @@ -41,6 +44,9 @@ def kundu_tedpca(comptable, n_echos, kdaw=10., rdaw=1., stabilize=False): comptable : :obj:`pandas.DataFrame` Component table with components classified as 'accepted', 'rejected', or 'ignored'. + metric_metadata : :obj:`dict` + Dictionary with metadata about calculated metrics. + Each entry corresponds to a column in ``comptable``. """ LGR.info('Performing PCA component selection with Kundu decision tree') comptable['classification'] = 'accepted' @@ -117,10 +123,28 @@ def kundu_tedpca(comptable, n_echos, kdaw=10., rdaw=1., stabilize=False): comptable.loc[under_fmin2, 'classification'] = 'rejected' comptable.loc[under_fmin2, 'rationale'] += 'P007;' + metric_metadata["classification"] = { + "LongName": "Component classification", + "Description": ( + "Classification from the classification procedure." + ), + "Levels": { + "accepted": "A BOLD-like component included in dimensionally-reduced data.", + "rejected": "A non-BOLD component removed from dimensionally-reduced data.", + }, + } + metric_metadata["rationale"] = { + "LongName": "Rationale for component classification", + "Description": ( + "The reason for the original classification. " + "Please see tedana's documentation for information about possible rationales." + ), + } + n_components = comptable.loc[comptable['classification'] == 'accepted'].shape[0] LGR.info('Selected {0} components with Kappa threshold: {1:.02f}, Rho ' 'threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr)) # Move decision columns to end comptable = clean_dataframe(comptable) - return comptable + return comptable, metric_metadata diff --git a/tedana/tests/data/cornell_three_echo_outputs.txt b/tedana/tests/data/cornell_three_echo_outputs.txt index efdf7a13d..9a3854ca4 100644 --- a/tedana/tests/data/cornell_three_echo_outputs.txt +++ b/tedana/tests/data/cornell_three_echo_outputs.txt @@ -1,6 +1,27 @@ -adaptive_mask.nii.gz -betas_OC.nii.gz -betas_hik_OC.nii.gz +S0map.nii.gz +T2starmap.nii.gz +dataset_description.json +desc-ICAAccepted_components.nii.gz +desc-ICAAccepted_stat-z_components.nii.gz +desc-ICA_components.nii.gz +desc-ICA_decomposition.json +desc-tedana_metrics.json +desc-tedana_metrics.tsv +desc-ICA_mixing.tsv +desc-ICA_stat-z_components.nii.gz +desc-PCA_decomposition.json +desc-PCA_metrics.json +desc-PCA_metrics.tsv +desc-PCA_mixing.tsv +desc-PCA_stat-z_components.nii.gz +desc-adaptiveGoodSignal_mask.nii.gz +desc-optcomAccepted_bold.nii.gz +desc-optcomDenoised_bold.nii.gz +desc-optcomRejected_bold.nii.gz +desc-optcom_bold.nii.gz +report.txt +tedana_report.html +figures figures/comp_000.png figures/comp_001.png figures/comp_002.png @@ -66,19 +87,3 @@ figures/comp_061.png figures/comp_062.png figures/comp_063.png figures/comp_064.png -dn_ts_OC.nii.gz -feats_OC2.nii.gz -figures -hik_ts_OC.nii.gz -ica_components.nii.gz -ica_decomposition.json -ica_mixing.tsv -lowk_ts_OC.nii.gz -pca_components.nii.gz -pca_decomposition.json -pca_mixing.tsv -report.txt -s0v.nii.gz -t2sv.nii.gz -tedana_report.html -ts_OC.nii.gz diff --git a/tedana/tests/data/fiu_four_echo_outputs.txt b/tedana/tests/data/fiu_four_echo_outputs.txt index 97ed46159..9c1e4b3bc 100644 --- a/tedana/tests/data/fiu_four_echo_outputs.txt +++ b/tedana/tests/data/fiu_four_echo_outputs.txt @@ -1,53 +1,76 @@ -T1gs.nii.gz -adaptive_mask.nii.gz -betas_OC.nii.gz -betas_hik_OC.nii.gz -betas_hik_OC_MIR.nii.gz -dn_ts_OC.nii.gz -dn_ts_OC_MIR.nii.gz -dn_ts_e1.nii.gz -dn_ts_e2.nii.gz -dn_ts_e3.nii.gz -dn_ts_e4.nii.gz -feats_OC2.nii.gz -glsig.1D -hik_ts_OC.nii.gz -hik_ts_OC_MIR.nii.gz -hik_ts_e1.nii.gz -hik_ts_e2.nii.gz -hik_ts_e3.nii.gz -hik_ts_e4.nii.gz -ica_components.nii.gz -ica_decomposition.json -ica_mixing.tsv -lowk_ts_OC.nii.gz -lowk_ts_e1.nii.gz -lowk_ts_e2.nii.gz -lowk_ts_e3.nii.gz -lowk_ts_e4.nii.gz -meica_R2_pred.nii.gz -meica_S0_pred.nii.gz -meica_betas_catd.nii.gz -meica_metric_weights.nii.gz -meica_mix_MIR.1D -mepca_R2_pred.nii.gz -mepca_S0_pred.nii.gz -mepca_betas_catd.nii.gz -mepca_metric_weights.nii.gz -pca_components.nii.gz -pca_decomposition.json -pca_mixing.tsv +S0map.nii.gz +T2starmap.nii.gz +dataset_description.json +desc-ICAAcceptedMIRDenoised_components.nii.gz +desc-ICAAccepted_components.nii.gz +desc-ICAAccepted_stat-z_components.nii.gz +desc-ICAAveragingWeights_components.nii.gz +desc-ICAMIRDenoised_mixing.tsv +desc-ICA_components.nii.gz +desc-ICA_decomposition.json +desc-tedana_metrics.json +desc-tedana_metrics.tsv +desc-ICA_mixing.tsv +desc-ICA_stat-z_components.nii.gz +desc-PCAAveragingWeights_components.nii.gz +desc-PCA_decomposition.json +desc-PCA_metrics.json +desc-PCA_metrics.tsv +desc-PCA_mixing.tsv +desc-PCA_stat-z_components.nii.gz +desc-T1likeEffect_min.nii.gz +desc-adaptiveGoodSignal_mask.nii.gz +desc-full_S0map.nii.gz +desc-full_T2starmap.nii.gz +desc-globalSignal_map.nii.gz +desc-globalSignal_timeseries.tsv +desc-optcomAcceptedMIRDenoised_bold.nii.gz +desc-optcomAccepted_bold.nii.gz +desc-optcomDenoised_bold.nii.gz +desc-optcomMIRDenoised_bold.nii.gz +desc-optcomNoGlobalSignal_bold.nii.gz +desc-optcomPCAReduced_bold.nii.gz +desc-optcomRejected_bold.nii.gz +desc-optcomWithGlobalSignal_bold.nii.gz +desc-optcom_bold.nii.gz +echo-1_desc-Accepted_bold.nii.gz +echo-1_desc-Denoised_bold.nii.gz +echo-1_desc-ICAR2ModelPredictions_components.nii.gz +echo-1_desc-ICAS0ModelPredictions_components.nii.gz +echo-1_desc-ICA_components.nii.gz +echo-1_desc-PCAR2ModelPredictions_components.nii.gz +echo-1_desc-PCAS0ModelPredictions_components.nii.gz +echo-1_desc-PCA_components.nii.gz +echo-1_desc-Rejected_bold.nii.gz +echo-2_desc-Accepted_bold.nii.gz +echo-2_desc-Denoised_bold.nii.gz +echo-2_desc-ICAR2ModelPredictions_components.nii.gz +echo-2_desc-ICAS0ModelPredictions_components.nii.gz +echo-2_desc-ICA_components.nii.gz +echo-2_desc-PCAR2ModelPredictions_components.nii.gz +echo-2_desc-PCAS0ModelPredictions_components.nii.gz +echo-2_desc-PCA_components.nii.gz +echo-2_desc-Rejected_bold.nii.gz +echo-3_desc-Accepted_bold.nii.gz +echo-3_desc-Denoised_bold.nii.gz +echo-3_desc-ICAR2ModelPredictions_components.nii.gz +echo-3_desc-ICAS0ModelPredictions_components.nii.gz +echo-3_desc-ICA_components.nii.gz +echo-3_desc-PCAR2ModelPredictions_components.nii.gz +echo-3_desc-PCAS0ModelPredictions_components.nii.gz +echo-3_desc-PCA_components.nii.gz +echo-3_desc-Rejected_bold.nii.gz +echo-4_desc-Accepted_bold.nii.gz +echo-4_desc-Denoised_bold.nii.gz +echo-4_desc-ICAR2ModelPredictions_components.nii.gz +echo-4_desc-ICAS0ModelPredictions_components.nii.gz +echo-4_desc-ICA_components.nii.gz +echo-4_desc-PCAR2ModelPredictions_components.nii.gz +echo-4_desc-PCAS0ModelPredictions_components.nii.gz +echo-4_desc-PCA_components.nii.gz +echo-4_desc-Rejected_bold.nii.gz report.txt -s0v.nii.gz -s0vG.nii.gz -sphis_hik.nii.gz -t2sv.nii.gz -t2svG.nii.gz tedana_report.html -ts_OC.nii.gz -ts_OC_whitened.nii.gz -tsoc_nogs.nii.gz -tsoc_orig.nii.gz figures figures/comp_000.png figures/comp_001.png diff --git a/tedana/tests/data/nih_five_echo_outputs_t2smap.txt b/tedana/tests/data/nih_five_echo_outputs_t2smap.txt index 1d2c6c46b..44424c03f 100644 --- a/tedana/tests/data/nih_five_echo_outputs_t2smap.txt +++ b/tedana/tests/data/nih_five_echo_outputs_t2smap.txt @@ -1,3 +1,4 @@ +dataset_description.json desc-full_S0map.nii.gz desc-full_T2starmap.nii.gz desc-optcom_bold.nii.gz diff --git a/tedana/tests/data/nih_five_echo_outputs_verbose.txt b/tedana/tests/data/nih_five_echo_outputs_verbose.txt index b76eedc46..5de724477 100644 --- a/tedana/tests/data/nih_five_echo_outputs_verbose.txt +++ b/tedana/tests/data/nih_five_echo_outputs_verbose.txt @@ -1,48 +1,77 @@ -adaptive_mask.nii.gz -betas_OC.nii.gz -betas_hik_OC.nii.gz -dn_ts_OC.nii.gz -dn_ts_e1.nii.gz -dn_ts_e2.nii.gz -dn_ts_e3.nii.gz -dn_ts_e4.nii.gz -dn_ts_e5.nii.gz -feats_OC2.nii.gz -hik_ts_OC.nii.gz -hik_ts_e1.nii.gz -hik_ts_e2.nii.gz -hik_ts_e3.nii.gz -hik_ts_e4.nii.gz -hik_ts_e5.nii.gz -ica_components.nii.gz -ica_decomposition.json -ica_mixing.tsv -ica_orth_mixing.tsv -lowk_ts_OC.nii.gz -lowk_ts_e1.nii.gz -lowk_ts_e2.nii.gz -lowk_ts_e3.nii.gz -lowk_ts_e4.nii.gz -lowk_ts_e5.nii.gz -meica_R2_pred.nii.gz -meica_S0_pred.nii.gz -meica_betas_catd.nii.gz -meica_metric_weights.nii.gz -mepca_R2_pred.nii.gz -mepca_S0_pred.nii.gz -mepca_betas_catd.nii.gz -mepca_metric_weights.nii.gz -pca_components.nii.gz -pca_decomposition.json -pca_mixing.tsv +S0map.nii.gz +T2starmap.nii.gz +dataset_description.json +desc-ICAAccepted_components.nii.gz +desc-ICAAccepted_stat-z_components.nii.gz +desc-ICAAveragingWeights_components.nii.gz +desc-ICAOrth_mixing.tsv +desc-ICA_components.nii.gz +desc-ICA_decomposition.json +desc-tedana_metrics.json +desc-tedana_metrics.tsv +desc-ICA_mixing.tsv +desc-ICA_stat-z_components.nii.gz +desc-PCAAveragingWeights_components.nii.gz +desc-PCA_decomposition.json +desc-PCA_metrics.json +desc-PCA_metrics.tsv +desc-PCA_mixing.tsv +desc-PCA_stat-z_components.nii.gz +desc-adaptiveGoodSignal_mask.nii.gz +desc-full_S0map.nii.gz +desc-full_T2starmap.nii.gz +desc-optcomAccepted_bold.nii.gz +desc-optcomDenoised_bold.nii.gz +desc-optcomPCAReduced_bold.nii.gz +desc-optcomRejected_bold.nii.gz +desc-optcom_bold.nii.gz +echo-1_desc-Accepted_bold.nii.gz +echo-1_desc-Denoised_bold.nii.gz +echo-1_desc-ICAR2ModelPredictions_components.nii.gz +echo-1_desc-ICAS0ModelPredictions_components.nii.gz +echo-1_desc-ICA_components.nii.gz +echo-1_desc-PCAR2ModelPredictions_components.nii.gz +echo-1_desc-PCAS0ModelPredictions_components.nii.gz +echo-1_desc-PCA_components.nii.gz +echo-1_desc-Rejected_bold.nii.gz +echo-2_desc-Accepted_bold.nii.gz +echo-2_desc-Denoised_bold.nii.gz +echo-2_desc-ICAR2ModelPredictions_components.nii.gz +echo-2_desc-ICAS0ModelPredictions_components.nii.gz +echo-2_desc-ICA_components.nii.gz +echo-2_desc-PCAR2ModelPredictions_components.nii.gz +echo-2_desc-PCAS0ModelPredictions_components.nii.gz +echo-2_desc-PCA_components.nii.gz +echo-2_desc-Rejected_bold.nii.gz +echo-3_desc-Accepted_bold.nii.gz +echo-3_desc-Denoised_bold.nii.gz +echo-3_desc-ICAR2ModelPredictions_components.nii.gz +echo-3_desc-ICAS0ModelPredictions_components.nii.gz +echo-3_desc-ICA_components.nii.gz +echo-3_desc-PCAR2ModelPredictions_components.nii.gz +echo-3_desc-PCAS0ModelPredictions_components.nii.gz +echo-3_desc-PCA_components.nii.gz +echo-3_desc-Rejected_bold.nii.gz +echo-4_desc-Accepted_bold.nii.gz +echo-4_desc-Denoised_bold.nii.gz +echo-4_desc-ICAR2ModelPredictions_components.nii.gz +echo-4_desc-ICAS0ModelPredictions_components.nii.gz +echo-4_desc-ICA_components.nii.gz +echo-4_desc-PCAR2ModelPredictions_components.nii.gz +echo-4_desc-PCAS0ModelPredictions_components.nii.gz +echo-4_desc-PCA_components.nii.gz +echo-4_desc-Rejected_bold.nii.gz +echo-5_desc-Accepted_bold.nii.gz +echo-5_desc-Denoised_bold.nii.gz +echo-5_desc-ICAR2ModelPredictions_components.nii.gz +echo-5_desc-ICAS0ModelPredictions_components.nii.gz +echo-5_desc-ICA_components.nii.gz +echo-5_desc-PCAR2ModelPredictions_components.nii.gz +echo-5_desc-PCAS0ModelPredictions_components.nii.gz +echo-5_desc-PCA_components.nii.gz +echo-5_desc-Rejected_bold.nii.gz report.txt -s0v.nii.gz -s0vG.nii.gz -t2sv.nii.gz -t2svG.nii.gz tedana_report.html -ts_OC.nii.gz -ts_OC_whitened.nii.gz figures figures/comp_000.png figures/comp_001.png diff --git a/tedana/tests/test_gscontrol.py b/tedana/tests/test_gscontrol.py index 45342c57c..07636aab1 100644 --- a/tedana/tests/test_gscontrol.py +++ b/tedana/tests/test_gscontrol.py @@ -2,10 +2,17 @@ Tests for tedana.model.fit """ +import os + import numpy as np import pytest import tedana.gscontrol as gsc +from tedana.io import OutputGenerator +from tedana.tests.utils import get_test_data_path + +data_dir = get_test_data_path() +ref_img = os.path.join(data_dir, 'mask.nii.gz') def test_break_gscontrol_raw(): @@ -16,19 +23,19 @@ def test_break_gscontrol_raw(): n_samples, n_echos, n_vols = 10000, 4, 100 catd = np.empty((n_samples, n_echos, n_vols)) optcom = np.empty((n_samples, n_vols)) - ref_img = '' + io_generator = OutputGenerator(ref_img) catd = np.empty((n_samples + 1, n_echos, n_vols)) with pytest.raises(ValueError) as e_info: gsc.gscontrol_raw(catd=catd, optcom=optcom, n_echos=n_echos, - ref_img=ref_img, dtrank=4) + io_generator=io_generator, dtrank=4) assert str(e_info.value) == ('First dimensions of catd ({0}) and optcom ({1}) do not ' 'match'.format(catd.shape[0], optcom.shape[0])) catd = np.empty((n_samples, n_echos + 1, n_vols)) with pytest.raises(ValueError) as e_info: gsc.gscontrol_raw(catd=catd, optcom=optcom, n_echos=n_echos, - ref_img=ref_img, dtrank=4) + io_generator=io_generator, dtrank=4) assert str(e_info.value) == ('Second dimension of catd ({0}) does not match ' 'n_echos ({1})'.format(catd.shape[1], n_echos)) @@ -36,7 +43,7 @@ def test_break_gscontrol_raw(): optcom = np.empty((n_samples, n_vols + 1)) with pytest.raises(ValueError) as e_info: gsc.gscontrol_raw(catd=catd, optcom=optcom, n_echos=n_echos, - ref_img=ref_img, dtrank=4) + io_generator=io_generator, dtrank=4) assert str(e_info.value) == ('Third dimension of catd ({0}) does not match ' 'second dimension of optcom ' '({1})'.format(catd.shape[2], optcom.shape[1])) diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py index 43f94d94a..05ee18b96 100644 --- a/tedana/tests/test_integration.py +++ b/tedana/tests/test_integration.py @@ -17,7 +17,6 @@ from tedana.workflows import tedana as tedana_cli from tedana.workflows import t2smap as t2smap_cli -from tedana import io def check_integration_outputs(fname, outpath): @@ -99,16 +98,16 @@ def test_integration_five_echo(skip_integration): verbose=True) # Just a check on the component table pending a unit test of load_comptable - comptable = os.path.join(out_dir, 'ica_decomposition.json') - df = io.load_comptable(comptable) + comptable = os.path.join(out_dir, 'desc-tedana_metrics.tsv') + df = pd.read_table(comptable) assert isinstance(df, pd.DataFrame) # Test re-running, but use the CLI out_dir2 = '/tmp/data/five-echo/TED.five-echo-manual' acc_comps = df.loc[df['classification'] == 'accepted'].index.values acc_comps = [str(c) for c in acc_comps] - mixing = os.path.join(out_dir, 'ica_mixing.tsv') - t2smap = os.path.join(out_dir, 't2sv.nii.gz') + mixing = os.path.join(out_dir, 'desc-ICA_mixing.tsv') + t2smap = os.path.join(out_dir, 'T2starmap.nii.gz') args = (['-d'] + datalist + ['-e'] + [str(te) for te in echo_times] + ['--out-dir', out_dir2, '--debug', '--verbose', '--manacc', *acc_comps, @@ -178,8 +177,8 @@ def test_integration_three_echo(skip_integration): args = (['-d', '/tmp/data/three-echo/three_echo_Cornell_zcat.nii.gz', '-e', '14.5', '38.5', '62.5', '--out-dir', out_dir2, '--debug', '--verbose', - '--ctab', os.path.join(out_dir, 'ica_decomposition.json'), - '--mix', os.path.join(out_dir, 'ica_mixing.tsv')]) + '--ctab', os.path.join(out_dir, 'desc-tedana_metrics.tsv'), + '--mix', os.path.join(out_dir, 'desc-ICA_mixing.tsv')]) tedana_cli._main(args) # compare the generated output files diff --git a/tedana/tests/test_io.py b/tedana/tests/test_io.py index b19d5ab04..82c0006dc 100644 --- a/tedana/tests/test_io.py +++ b/tedana/tests/test_io.py @@ -25,10 +25,6 @@ def test_new_nii_like(): assert nimg.shape == (39, 50, 33, 3, 5) -def test_filewrite(): - pass - - def test_load_data(): fimg = [nib.load(f) for f in fnames] exp_shape = (64350, 3, 5) @@ -97,63 +93,45 @@ def test_smoke_write_split_ts(): # ref_img has shape of (39, 50, 33) so data is 64350 (39*33*50) x 10 # creating the component table with component as random floats, # a "metric," and random classification + io_generator = me.OutputGenerator(ref_img) component = np.random.random((n_components)) metric = np.random.random((n_components)) classification = np.random.choice(["accepted", "rejected", "ignored"], n_components) df_data = np.column_stack((component, metric, classification)) comptable = pd.DataFrame(df_data, columns=['component', 'metric', 'classification']) - assert me.write_split_ts(data, mmix, mask, comptable, ref_img) is not None + assert me.write_split_ts(data, mmix, mask, comptable, io_generator) is not None # TODO: midk_ts.nii is never generated? - for filename in ["hik_ts_.nii.gz", "lowk_ts_.nii.gz", "dn_ts_.nii.gz"]: + fn = io_generator.get_name + split = ('high kappa ts img', 'low kappa ts img', 'denoised ts img') + fnames = [fn(f) for f in split] + for filename in fnames: # remove all files generated - try: - os.remove(filename) - except OSError: - print(filename + " not generated") - pass - - -def test_smoke_writefeats(): - """ - Ensures that writefeats writes out the expected feature with random - input, since there is no suffix, remove feats_.nii - """ - n_samples, n_times, n_components = 64350, 10, 6 - data = np.random.random((n_samples, n_times)) - mmix = np.random.random((n_times, n_components)) - mask = np.random.randint(2, size=n_samples) - ref_img = os.path.join(data_dir, 'mask.nii.gz') - - assert me.writefeats(data, mmix, mask, ref_img) is not None - - # this only generates feats_.nii, so delete that - try: - os.remove("feats_.nii.gz") - except OSError: - print("feats_.nii not generated") - pass + os.remove(filename) def test_smoke_filewrite(): """ - Ensures that filewrite writes out a neuroimage with random input, - since there is no name, remove the image named .nii + Ensures that filewrite fails for no known image type, write a known key + in both bids and orig formats """ - n_samples, n_times, _ = 64350, 10, 6 + n_samples, _, _ = 64350, 10, 6 data_1d = np.random.random((n_samples)) - data_2d = np.random.random((n_samples, n_times)) - filename = "" ref_img = os.path.join(data_dir, 'mask.nii.gz') + io_generator = me.OutputGenerator(ref_img) - assert me.filewrite(data_1d, filename, ref_img) is not None - assert me.filewrite(data_2d, filename, ref_img) is not None + with pytest.raises(KeyError): + io_generator.save_file(data_1d, '') - try: - os.remove(".nii.gz") - except OSError: - print(".nii not generated") + for convention in ('bidsv1.5.0', 'orig'): + io_generator.convention = convention + fname = io_generator.save_file(data_1d, 't2star img') + assert fname is not None + try: + os.remove(fname) + except OSError: + print('File not generated!') def test_smoke_load_data(): diff --git a/tedana/tests/test_model_fit_dependence_metrics.py b/tedana/tests/test_model_fit_dependence_metrics.py index f243bacc3..f50eecfc8 100644 --- a/tedana/tests/test_model_fit_dependence_metrics.py +++ b/tedana/tests/test_model_fit_dependence_metrics.py @@ -1,11 +1,17 @@ """ Tests for tedana.metrics.fit """ +import os import numpy as np import pytest from tedana.metrics import kundu_fit +from tedana.io import OutputGenerator +from tedana.tests.utils import get_test_data_path + +data_dir = get_test_data_path() +ref_img = os.path.join(data_dir, 'mask.nii.gz') def test_break_dependence_metrics(): @@ -19,14 +25,14 @@ def test_break_dependence_metrics(): mmix = np.empty((n_vols, n_comps)) adaptive_mask = np.empty((n_samples)) tes = np.empty((n_echos)) - ref_img = '' + io_generator = OutputGenerator(ref_img) # Shape of catd is wrong catd = np.empty((n_samples + 1, n_echos, n_vols)) with pytest.raises(ValueError): kundu_fit.dependence_metrics( catd=catd, tsoc=tsoc, mmix=mmix, - adaptive_mask=adaptive_mask, tes=tes, ref_img=ref_img, + adaptive_mask=adaptive_mask, tes=tes, io_generator=io_generator, reindex=False, mmixN=None, algorithm='kundu_v3') # Shape of adaptive_mask is wrong @@ -35,7 +41,7 @@ def test_break_dependence_metrics(): with pytest.raises(ValueError): kundu_fit.dependence_metrics( catd=catd, tsoc=tsoc, mmix=mmix, - adaptive_mask=adaptive_mask, tes=tes, ref_img=ref_img, + adaptive_mask=adaptive_mask, tes=tes, io_generator=io_generator, reindex=False, mmixN=None, algorithm='kundu_v3') # Shape of tsoc is wrong @@ -44,7 +50,7 @@ def test_break_dependence_metrics(): with pytest.raises(ValueError): kundu_fit.dependence_metrics( catd=catd, tsoc=tsoc, mmix=mmix, - adaptive_mask=adaptive_mask, tes=tes, ref_img=ref_img, + adaptive_mask=adaptive_mask, tes=tes, io_generator=io_generator, reindex=False, mmixN=None, algorithm='kundu_v3') # Shape of catd is wrong @@ -53,7 +59,7 @@ def test_break_dependence_metrics(): with pytest.raises(ValueError): kundu_fit.dependence_metrics( catd=catd, tsoc=tsoc, mmix=mmix, - adaptive_mask=adaptive_mask, tes=tes, ref_img=ref_img, + adaptive_mask=adaptive_mask, tes=tes, io_generator=io_generator, reindex=False, mmixN=None, algorithm='kundu_v3') # Shape of catd is wrong @@ -61,5 +67,5 @@ def test_break_dependence_metrics(): with pytest.raises(ValueError): kundu_fit.dependence_metrics( catd=catd, tsoc=tsoc, mmix=mmix, - adaptive_mask=adaptive_mask, tes=tes, ref_img=ref_img, + adaptive_mask=adaptive_mask, tes=tes, io_generator=io_generator, reindex=False, mmixN=None, algorithm='kundu_v3') diff --git a/tedana/tests/test_model_kundu_metrics.py b/tedana/tests/test_model_kundu_metrics.py index 2e4ad75b5..0654c4fc5 100644 --- a/tedana/tests/test_model_kundu_metrics.py +++ b/tedana/tests/test_model_kundu_metrics.py @@ -33,5 +33,9 @@ def test_smoke_kundu_metrics(): metric_maps['Br_R2_clmaps'] = np.random.randint(low=0, high=2, size=(n_voxels, n_comps)) - comptable = kundu_fit.kundu_metrics(comptable, metric_maps) + comptable, metric_metadata = kundu_fit.kundu_metrics( + comptable, + metric_maps, + metric_metadata={}, + ) assert comptable is not None diff --git a/tedana/tests/test_selection.py b/tedana/tests/test_selection.py index e064fc6df..999303dd4 100644 --- a/tedana/tests/test_selection.py +++ b/tedana/tests/test_selection.py @@ -14,18 +14,22 @@ def test_manual_selection(): accepted and rejected components. """ comptable = pd.DataFrame(index=np.arange(100)) - comptable = selection.manual_selection(comptable, acc=[1, 3, 5]) + comptable, metric_metadata = selection.manual_selection(comptable, {}, acc=[1, 3, 5]) assert comptable.loc[comptable.classification == 'accepted'].shape[0] == 3 assert (comptable.loc[comptable.classification == 'rejected'].shape[0] == (comptable.shape[0] - 3)) - comptable = selection.manual_selection(comptable, rej=[1, 3, 5]) + comptable, metric_metadata = selection.manual_selection(comptable, {}, rej=[1, 3, 5]) assert comptable.loc[comptable.classification == 'rejected'].shape[0] == 3 assert (comptable.loc[comptable.classification == 'accepted'].shape[0] == (comptable.shape[0] - 3)) - comptable = selection.manual_selection(comptable, acc=[0, 2, 4], - rej=[1, 3, 5]) + comptable, metric_metadata = selection.manual_selection( + comptable, + {}, + acc=[0, 2, 4], + rej=[1, 3, 5] + ) assert comptable.loc[comptable.classification == 'accepted'].shape[0] == 3 assert comptable.loc[comptable.classification == 'rejected'].shape[0] == 3 assert (comptable.loc[comptable.classification == 'ignored'].shape[0] == diff --git a/tedana/utils.py b/tedana/utils.py index 06588400b..1235a1238 100644 --- a/tedana/utils.py +++ b/tedana/utils.py @@ -2,6 +2,7 @@ Utilities for tedana package """ import logging +import os.path as op import numpy as np import nibabel as nib @@ -368,3 +369,12 @@ def millisec2sec(arr): Values in seconds. """ return arr / 1000. + + +def get_resource_path(): + """Return the path to general resources, terminated with separator. + + Resources are kept outside package folder in "datasets". + Based on function by Yaroslav Halchenko used in Neurosynth Python package. + """ + return op.abspath(op.join(op.dirname(__file__), "resources") + op.sep) diff --git a/tedana/workflows/t2smap.py b/tedana/workflows/t2smap.py index 94eacade3..c17fc5d0a 100644 --- a/tedana/workflows/t2smap.py +++ b/tedana/workflows/t2smap.py @@ -10,7 +10,7 @@ from scipy import stats from threadpoolctl import threadpool_limits -from tedana import (combine, decay, io, utils) +from tedana import (combine, decay, io, utils, __version__) from tedana.workflows.parser_utils import is_valid_file LGR = logging.getLogger(__name__) @@ -63,6 +63,18 @@ def _get_parser(): 'Dependent ANAlysis. Must be in the same ' 'space as `data`.'), default=None) + optional.add_argument('--prefix', + dest='prefix', + type=str, + help='Prefix for filenames generated.', + default='') + optional.add_argument('--convention', + dest='convention', + action='store', + choices=['orig', 'bids'], + help=('Filenaming convention. bids will use ' + 'the latest BIDS derivatives version.'), + default='bids') optional.add_argument('--fittype', dest='fittype', action='store', @@ -116,6 +128,7 @@ def _get_parser(): def t2smap_workflow(data, tes, out_dir='.', mask=None, + prefix='', convention='bids', fittype='loglin', fitmode='all', combmode='t2s', debug=False, quiet=False): """ @@ -197,6 +210,14 @@ def t2smap_workflow(data, tes, out_dir='.', mask=None, LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) + io_generator = io.OutputGenerator( + ref_img, + convention=convention, + out_dir=out_dir, + prefix=prefix, + config="auto", + make_figures=False, + ) n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) @@ -236,13 +257,39 @@ def t2smap_workflow(data, tes, out_dir='.', mask=None, s0_limited[s0_limited < 0] = 0 t2s_limited[t2s_limited < 0] = 0 - io.filewrite(utils.millisec2sec(t2s_limited), - op.join(out_dir, 'T2starmap.nii.gz'), ref_img) - io.filewrite(s0_limited, op.join(out_dir, 'S0map.nii.gz'), ref_img) - io.filewrite(utils.millisec2sec(t2s_full), - op.join(out_dir, 'desc-full_T2starmap.nii.gz'), ref_img) - io.filewrite(s0_full, op.join(out_dir, 'desc-full_S0map.nii.gz'), ref_img) - io.filewrite(OCcatd, op.join(out_dir, 'desc-optcom_bold.nii.gz'), ref_img) + io_generator.save_file( + utils.millisec2sec(t2s_limited), + 't2star img', + ) + io_generator.save_file(s0_limited, 's0 img') + io_generator.save_file( + utils.millisec2sec(t2s_full), + 'full t2star img', + ) + io_generator.save_file( + s0_full, + 'full s0 img', + ) + io_generator.save_file(OCcatd, 'combined img') + + # Write out BIDS-compatible description file + derivative_metadata = { + "Name": "t2smap Outputs", + "BIDSVersion": "1.5.0", + "DatasetType": "derivative", + "GeneratedBy": [ + { + "Name": "tedana", + "Version": __version__, + "Description": ( + "A pipeline estimating T2* from multi-echo fMRI data and " + "combining data across echoes." + ), + "CodeURL": "https://github.com/ME-ICA/tedana" + } + ] + } + io_generator.save_file(derivative_metadata, 'data description json') def _main(argv=None): diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 35f218100..7cf42395b 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -1,6 +1,7 @@ """ Run the "canonical" TE-Dependent ANAlysis workflow. """ +import json import os import sys import os.path as op @@ -17,7 +18,7 @@ from nilearn.masking import compute_epi_mask from tedana import (decay, combine, decomposition, io, metrics, - reporting, selection, utils) + reporting, selection, utils, __version__) import tedana.gscontrol as gsc from tedana.stats import computefeats2 from tedana.workflows.parser_utils import is_valid_file, check_tedpca_value, ContextFilter @@ -77,6 +78,18 @@ def _get_parser(): "function will be used to derive a mask " "from the first echo's data."), default=None) + optional.add_argument('--prefix', + dest='prefix', + type=str, + help="Prefix for filenames generated.", + default='') + optional.add_argument('--convention', + dest='convention', + action='store', + choices=['orig', 'bids'], + help=("Filenaming convention. bids will use " + "the latest BIDS derivatives version."), + default='bids') optional.add_argument('--fittype', dest='fittype', action='store', @@ -235,6 +248,7 @@ def _get_parser(): def tedana_workflow(data, tes, out_dir='.', mask=None, + convention='bids', prefix='', fittype='loglin', combmode='t2s', tedpca='mdl', fixed_seed=42, maxit=500, maxrestart=10, tedort=False, gscontrol=None, @@ -398,11 +412,19 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, LGR.info('Loading input data: {}'.format([f for f in data])) catd, ref_img = io.load_data(data, n_echos=n_echos) + io_generator = io.OutputGenerator( + ref_img, + convention=convention, + out_dir=out_dir, + prefix=prefix, + config="auto", + ) + n_samp, n_echos, n_vols = catd.shape LGR.debug('Resulting data shape: {}'.format(catd.shape)) # check if TR is 0 - img_t_r = ref_img.header.get_zooms()[-1] + img_t_r = io_generator.reference_img.header.get_zooms()[-1] if img_t_r == 0: raise IOError('Dataset has a TR of 0. This indicates incorrect' ' header information. To correct this, we recommend' @@ -415,18 +437,20 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, if mixm is not None and op.isfile(mixm): mixm = op.abspath(mixm) # Allow users to re-run on same folder - if mixm != op.join(out_dir, 'ica_mixing.tsv'): - shutil.copyfile(mixm, op.join(out_dir, 'ica_mixing.tsv')) - shutil.copyfile(mixm, op.join(out_dir, op.basename(mixm))) + mixing_name = io_generator.get_name("ICA mixing tsv") + if mixm != mixing_name: + shutil.copyfile(mixm, mixing_name) + shutil.copyfile(mixm, op.join(io_generator.out_dir, op.basename(mixm))) elif mixm is not None: raise IOError('Argument "mixm" must be an existing file.') if ctab is not None and op.isfile(ctab): ctab = op.abspath(ctab) # Allow users to re-run on same folder - if ctab != op.join(out_dir, 'ica_decomposition.json'): - shutil.copyfile(ctab, op.join(out_dir, 'ica_decomposition.json')) - shutil.copyfile(ctab, op.join(out_dir, op.basename(ctab))) + metrics_name = io_generator.get_name("ICA metrics tsv") + if ctab != metrics_name: + shutil.copyfile(ctab, metrics_name) + shutil.copyfile(ctab, op.join(io_generator.out_dir, op.basename(ctab))) elif ctab is not None: raise IOError('Argument "ctab" must be an existing file.') @@ -441,11 +465,11 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, manacc = [int(m) for m in manacc] if t2smap is not None and op.isfile(t2smap): + t2smap_file = io_generator.get_name('t2star img') t2smap = op.abspath(t2smap) # Allow users to re-run on same folder - if t2smap != op.join(out_dir, 't2sv.nii.gz'): - shutil.copyfile(t2smap, op.join(out_dir, 't2sv.nii.gz')) - shutil.copyfile(t2smap, op.join(out_dir, op.basename(t2smap))) + if t2smap != t2smap_file: + shutil.copyfile(t2smap, t2smap_file) elif t2smap is not None: raise IOError('Argument "t2smap" must be an existing file.') @@ -469,7 +493,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, mask[t2s_limited == 0] = 0 # reduce mask based on T2* map else: LGR.info('Computing EPI mask from first echo') - first_echo_img = io.new_nii_like(ref_img, catd[:, 0, :]) + first_echo_img = io.new_nii_like(io_generator.reference_img, catd[:, 0, :]) mask = compute_epi_mask(first_echo_img) RepLGR.info("An initial mask was generated from the first echo using " "nilearn's compute_epi_mask function.") @@ -477,7 +501,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, # Create an adaptive mask with at least 3 good echoes. mask, masksum = utils.make_adaptive_mask(catd, mask=mask, getsum=True, threshold=3) LGR.debug('Retaining {}/{} samples'.format(mask.sum(), n_samp)) - io.filewrite(masksum, op.join(out_dir, 'adaptive_mask.nii'), ref_img) + io_generator.save_file(masksum, "adaptive mask img") if t2smap is None: LGR.info('Computing T2* map') @@ -491,33 +515,33 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, LGR.debug('Setting cap on T2* map at {:.5f}s'.format( utils.millisec2sec(cap_t2s))) t2s_limited[t2s_limited > cap_t2s * 10] = cap_t2s - io.filewrite(utils.millisec2sec(t2s_limited), op.join(out_dir, 't2sv.nii'), ref_img) - io.filewrite(s0_limited, op.join(out_dir, 's0v.nii'), ref_img) + io_generator.save_file(utils.millisec2sec(t2s_limited), 't2star img') + io_generator.save_file(s0_limited, 's0 img') if verbose: - io.filewrite(utils.millisec2sec(t2s_full), op.join(out_dir, 't2svG.nii'), ref_img) - io.filewrite(s0_full, op.join(out_dir, 's0vG.nii'), ref_img) + io_generator.save_file(utils.millisec2sec(t2s_full), 'full t2star img') + io_generator.save_file(s0_full, 'full s0 img') # optimally combine data data_oc = combine.make_optcom(catd, tes, masksum, t2s=t2s_full, combmode=combmode) # regress out global signal unless explicitly not desired if 'gsr' in gscontrol: - catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, ref_img, - out_dir=out_dir) + catd, data_oc = gsc.gscontrol_raw(catd, data_oc, n_echos, io_generator) + + fout = io_generator.save_file(data_oc, 'combined img') + LGR.info('Writing optimally combined data set: {}'.format(fout)) if mixm is None: # Identify and remove thermal noise from data dd, n_components = decomposition.tedpca(catd, data_oc, combmode, mask, - masksum, t2s_full, ref_img, + masksum, t2s_full, io_generator, tes=tes, algorithm=tedpca, kdaw=10., rdaw=1., - out_dir=out_dir, verbose=verbose, low_mem=low_mem) if verbose: - io.filewrite(utils.unmask(dd, mask), - op.join(out_dir, 'ts_OC_whitened.nii.gz'), ref_img) + io_generator.save_file(utils.unmask(dd, mask), 'whitened img') # Perform ICA, calculate metrics, and apply decision tree # Restart when ICA fails to converge or too few BOLD components found @@ -536,13 +560,22 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, # generated from dimensionally reduced data using full data (i.e., data # with thermal noise) LGR.info('Making second component selection guess from ICA results') - comptable, metric_maps, betas, mmix = metrics.dependence_metrics( + comptable, metric_maps, metric_metadata, betas, mmix = metrics.dependence_metrics( catd, data_oc, mmix_orig, masksum, tes, - ref_img, reindex=True, label='meica_', out_dir=out_dir, - algorithm='kundu_v2', verbose=verbose + io_generator, reindex=True, label='ICA', + algorithm='kundu_v2', verbose=verbose, + ) + comptable, metric_metadata = metrics.kundu_metrics( + comptable, + metric_maps, + metric_metadata, + ) + comptable, metric_metadata = selection.kundu_selection_v2( + comptable, + metric_metadata, + n_echos, + n_vols, ) - comptable = metrics.kundu_metrics(comptable, metric_maps) - comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) n_bold_comps = comptable[comptable.classification == 'accepted'].shape[0] if (n_restarts < maxrestart) and (n_bold_comps == 0): @@ -552,47 +585,86 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, keep_restarting = False else: keep_restarting = False - - # Write out ICA files. - comp_names = [io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max()) - for comp in comptable.index.values] - mixing_df = pd.DataFrame(data=mmix, columns=comp_names) - mixing_df.to_csv(op.join(out_dir, 'ica_mixing.tsv'), sep='\t', index=False) - betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) - io.filewrite(betas_oc, - op.join(out_dir, 'ica_components.nii.gz'), - ref_img) else: LGR.info('Using supplied mixing matrix from ICA') - mmix_orig = pd.read_table(op.join(out_dir, 'ica_mixing.tsv')).values + mixing_file = io_generator.get_name("ICA mixing tsv") + mmix_orig = pd.read_table(mixing_file).values if ctab is None: - comptable, metric_maps, betas, mmix = metrics.dependence_metrics( + comptable, metric_maps, metric_metadata, betas, mmix = metrics.dependence_metrics( catd, data_oc, mmix_orig, masksum, tes, - ref_img, label='meica_', out_dir=out_dir, - algorithm='kundu_v2', verbose=verbose) - comptable = metrics.kundu_metrics(comptable, metric_maps) - comptable = selection.kundu_selection_v2(comptable, n_echos, n_vols) + io_generator, label='ICA', algorithm='kundu_v2', + verbose=verbose) + comptable, metric_metadata = metrics.kundu_metrics( + comptable, + metric_maps, + metric_metadata, + ) + comptable, metric_metadata = selection.kundu_selection_v2( + comptable, + metric_metadata, + n_echos, + n_vols, + ) else: mmix = mmix_orig.copy() - comptable = io.load_comptable(ctab) + comptable = pd.read_table(ctab) + # Try to find and load the metric metadata file + metadata_file = io_generator.get_name('ICA metrics json') + if op.isfile(metadata_file): + with open(metadata_file, "r") as fo: + metric_metadata = json.load(fo) + else: + metric_metadata = {} + if manacc is not None: - comptable = selection.manual_selection(comptable, acc=manacc) - betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) - io.filewrite(betas_oc, - op.join(out_dir, 'ica_components.nii.gz'), - ref_img) - - # Save component table - comptable['Description'] = 'ICA fit to dimensionally-reduced optimally combined data.' - mmix_dict = {} - mmix_dict['Method'] = ('Independent components analysis with FastICA ' - 'algorithm implemented by sklearn. Components ' - 'are sorted by Kappa in descending order. ' - 'Component signs are flipped to best match the ' - 'data.') - io.save_comptable(comptable, op.join(out_dir, 'ica_decomposition.json'), - label='ica', metadata=mmix_dict) + comptable, metric_metadata = selection.manual_selection( + comptable, + metric_metadata, + acc=manacc + ) + + # Write out ICA files. + comp_names = comptable["Component"].values + mixing_df = pd.DataFrame(data=mmix, columns=comp_names) + mixing_df.to_csv(io_generator.get_name("ICA mixing tsv"), sep="\t", index=False) + betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) + io_generator.save_file(betas_oc, 'z-scored ICA components img') + + # Save component table and associated json + temp_comptable = comptable.set_index("Component", inplace=False) + temp_comptable.to_csv( + io_generator.get_name("ICA metrics tsv"), + index=True, + index_label="Component", + sep='\t', + ) + metric_metadata["Component"] = { + "LongName": "Component identifier", + "Description": ( + "The unique identifier of each component. " + "This identifier matches column names in the mixing matrix TSV file." + ), + } + with open(io_generator.get_name("ICA metrics json"), "w") as fo: + json.dump(metric_metadata, fo, sort_keys=True, indent=4) + + decomp_metadata = { + "Method": ( + "Independent components analysis with FastICA " + "algorithm implemented by sklearn. Components " + "are sorted by Kappa in descending order. " + "Component signs are flipped to best match the " + "data." + ), + } + for comp_name in comp_names: + decomp_metadata[comp_name] = { + "Description": "ICA fit to dimensionally-reduced optimally combined data.", + "Method": "tedana", + } + with open(io_generator.get_name("ICA decomposition json"), "w") as fo: + json.dump(decomp_metadata, fo, sort_keys=True, indent=4) if comptable[comptable.classification == 'accepted'].shape[0] == 0: LGR.warning('No BOLD components detected! Please check data and ' @@ -613,7 +685,11 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, comp_names = [io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max()) for comp in comptable.index.values] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) - mixing_df.to_csv(op.join(out_dir, 'ica_orth_mixing.tsv'), sep='\t', index=False) + mixing_df.to_csv( + io_generator.get_name("ICA orthogonalized mixing tsv"), + sep='\t', + index=False + ) RepLGR.info("Rejected components' time series were then " "orthogonalized with respect to accepted components' time " "series.") @@ -623,14 +699,52 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, comptable=comptable, mmix=mmix, n_vols=n_vols, - ref_img=ref_img, - out_dir=out_dir) + io_generator=io_generator) if 'mir' in gscontrol: - gsc.minimum_image_regression(data_oc, mmix, mask, comptable, ref_img, out_dir=out_dir) + gsc.minimum_image_regression(data_oc, mmix, mask, comptable, io_generator) if verbose: - io.writeresults_echoes(catd, mmix, mask, comptable, ref_img, out_dir=out_dir) + io.writeresults_echoes(catd, mmix, mask, comptable, io_generator) + + if not no_reports: + LGR.info('Making figures folder with static component maps and ' + 'timecourse plots.') + reporting.static_figures.comp_figures(data_oc, mask=mask, + comptable=comptable, + mmix=mmix_orig, + io_generator=io_generator, + png_cmap=png_cmap) + + if sys.version_info.major == 3 and sys.version_info.minor < 6: + warn_msg = ("Reports requested but Python version is less than " + "3.6.0. Dynamic reports will not be generated.") + LGR.warn(warn_msg) + else: + LGR.info('Generating dynamic report') + reporting.generate_report(io_generator, tr=img_t_r) + + # Write out BIDS-compatible description file + derivative_metadata = { + "Name": "tedana Outputs", + "BIDSVersion": "1.5.0", + "DatasetType": "derivative", + "GeneratedBy": [ + { + "Name": "tedana", + "Version": __version__, + "Description": ( + "A denoising pipeline for the identification and removal " + "of non-BOLD noise from multi-echo fMRI data." + ), + "CodeURL": "https://github.com/ME-ICA/tedana" + } + ] + } + with open(io_generator.get_name("data description json"), "w") as fo: + json.dump(derivative_metadata, fo, sort_keys=True, indent=4) + + LGR.info('Workflow completed') RepLGR.info("This workflow used numpy (Van Der Walt, Colbert, & " "Varoquaux, 2011), scipy (Jones et al., 2001), pandas " @@ -671,29 +785,6 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, with open(repname, 'w') as fo: fo.write(report) - if not no_reports: - LGR.info('Making figures folder with static component maps and ' - 'timecourse plots.') - # make figure folder first - if not op.isdir(op.join(out_dir, 'figures')): - os.mkdir(op.join(out_dir, 'figures')) - - reporting.static_figures.comp_figures(data_oc, mask=mask, - comptable=comptable, - mmix=mmix_orig, - ref_img=ref_img, - out_dir=op.join(out_dir, - 'figures'), - png_cmap=png_cmap) - - if sys.version_info.major == 3 and sys.version_info.minor < 6: - warn_msg = ("Reports requested but Python version is less than " - "3.6.0. Dynamic reports will not be generated.") - LGR.warn(warn_msg) - else: - LGR.info('Generating dynamic report') - reporting.generate_report(out_dir=out_dir, tr=img_t_r) - log_handler.close() logging.root.removeHandler(log_handler) sh.close()