[ENH] Generate BIDS Derivatives-compatible outputs (#691)

* BIDS Derivatives-compatible outputs. * Switches to class-based output generator for file naming Co-authored-by: Taylor Salo <tsalo006@fiu.edu> Co-authored-by: Joshua Teves <joshua.teves@nih.gov>
ME-ICA · May 5, 2021 · ad664ae · ad664ae
1 parent 8477aab
commit ad664ae
Show file tree

Hide file tree

Showing 27 changed files with 1,667 additions and 847 deletions.
diff --git a/.gitignore b/.gitignore
@@ -108,4 +108,7 @@ ENV/
 
 # jupyter notebooks
 .ipynb_checkpoints/
-*.ipynb
+*.ipynb
+
+# vim swap files
+*.swp
diff --git a/docs/api.rst b/docs/api.rst
@@ -161,7 +161,6 @@ API
    tedana.io.add_decomp_prefix
    tedana.io.split_ts
    tedana.io.write_split_ts
-   tedana.io.writefeats
    tedana.io.writeresults
    tedana.io.writeresults_echoes
 

diff --git a/docs/approach.rst b/docs/approach.rst
@@ -69,7 +69,7 @@ estimate voxel-wise :math:`T_{2}^*` and :math:`S_0`.
 :math:`S_0` corresponds to the total signal in each voxel before decay and can reflect coil sensivity.
 :math:`T_{2}^*` corresponds to the rate at which a voxel decays over time, which
 is related to signal dropout and BOLD sensitivity.
-Estimates of the parameters are saved as **t2sv.nii.gz** and **s0v.nii.gz**.
+Estimates of the parameters are saved as **T2starmap.nii.gz** and **S0map.nii.gz**.
 
 While :math:`T_{2}^*` and :math:`S_0` in fact fluctuate over time, estimating
 them on a volume-by-volume basis with only a small number of echoes is not
@@ -153,7 +153,7 @@ between the distributions for other echoes.
 
 The time series for the optimally combined data also looks like a combination
 of the other echoes (which it is).
-This optimally combined data is written out as **ts_OC.nii.gz**
+This optimally combined data is written out as **desc-optcom_bold.nii.gz**
 
 .. image:: /_static/a10_optimal_combination_timeseries.png
 
@@ -194,7 +194,7 @@ component analysis (PCA).
 The goal of this step is to make it easier for the later ICA decomposition to converge.
 Dimensionality reduction is a common step prior to ICA.
 TEDPCA applies PCA to the optimally combined data in order to decompose it into component maps and
-time series (saved as **mepca_mix.1D**).
+time series (saved as **desc-PCA_mixing.tsv**).
 Here we can see time series for some example components (we don't really care about the maps):
 
 .. image:: /_static/a11_pca_component_timeseries.png
@@ -277,9 +277,9 @@ Next, ``tedana`` applies TE-dependent independent component analysis (ICA) in
 order to identify and remove TE-independent (i.e., non-BOLD noise) components.
 The dimensionally reduced optimally combined data are first subjected to ICA in
 order to fit a mixing matrix to the whitened data.
-This generates a number of independent timeseries (saved as **meica_mix.1D**),
-as well as beta maps which show the spatial loading of these components on the
-brain (**betas_OC.nii.gz**).
+This generates a number of independent timeseries (saved as **desc-ICA_mixing.tsv**),
+as well as parameter estimate maps which show the spatial loading of these components on the
+brain (**desc-ICA_components.nii.gz**).
 
 .. image:: /_static/a13_ica_component_timeseries.png
 
@@ -312,13 +312,13 @@ The blue and red lines show the predicted values for the :math:`S_0` and
 A decision tree is applied to :math:`\kappa`, :math:`\rho`, and other metrics in order to
 classify ICA components as TE-dependent (BOLD signal), TE-independent
 (non-BOLD noise), or neither (to be ignored).
-These classifications are saved in `comp_table_ica.txt`.
+These classifications are saved in **desc-tedana_metrics.tsv**.
 The actual decision tree is dependent on the component selection algorithm employed.
 ``tedana`` includes the option `kundu` (which uses hardcoded thresholds
 applied to each of the metrics).
 
 Components that are classified as noise are projected out of the optimally combined data,
-yielding a denoised timeseries, which is saved as `dn_ts_OC.nii.gz`.
+yielding a denoised timeseries, which is saved as **desc-optcomDenoised_bold.nii.gz**.
 
 .. image:: /_static/a15_denoised_data_timeseries.png
 

diff --git a/docs/outputs.rst b/docs/outputs.rst
diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py
@@ -2,7 +2,6 @@
 PCA and related signal decomposition methods for tedana
 """
 import logging
-import os.path as op
 from numbers import Number
 
 import numpy as np
@@ -48,8 +47,8 @@ def low_mem_pca(data):
 
 
 def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
-           ref_img, tes, algorithm='mdl', kdaw=10., rdaw=1.,
-           out_dir='.', verbose=False, low_mem=False):
+           io_generator, tes, algorithm='mdl', kdaw=10., rdaw=1.,
+           verbose=False, low_mem=False):
     """
     Use principal components analysis (PCA) to identify and remove thermal
     noise from multi-echo data.
@@ -73,8 +72,8 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
         For more information on thresholding, see `make_adaptive_mask`.
     t2sG : (S,) array_like
         Map of voxel-wise T2* estimates.
-    ref_img : :obj:`str` or img_like
-        Reference image to dictate how outputs are saved to disk
+    io_generator : :obj:`tedana.io.OutputGenerator`
+        The output generation object for this workflow
     tes : :obj:`list`
         List of echo times associated with `data_cat`, in milliseconds
     algorithm : {'kundu', 'kundu-stabilize', 'mdl', 'aic', 'kic', float}, optional
@@ -91,8 +90,6 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
     rdaw : :obj:`float`, optional
         Dimensionality augmentation weight for Rho calculations. Must be a
         non-negative float, or -1 (a special value). Default is 1.
-    out_dir : :obj:`str`, optional
-        Output directory.
     verbose : :obj:`bool`, optional
         Whether to output files from fitmodels_direct or not. Default: False
     low_mem : :obj:`bool`, optional
@@ -147,19 +144,20 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
     Outputs:
 
     This function writes out several files:
-
-    ======================    =================================================
-    Filename                  Content
-    ======================    =================================================
-    pca_decomposition.json    PCA component table.
-    pca_mixing.tsv            PCA mixing matrix.
-    pca_components.nii.gz     Component weight maps.
-    ======================    =================================================
+    ===========================    =============================================
+    Default Filename               Content
+    ===========================    =============================================
+    desc-PCA_decomposition.json    PCA component table
+    desc-PCA_mixing.tsv            PCA mixing matrix
+    desc-PCA_components.nii.gz     Component weight maps
+    ===========================    =============================================
 
     See Also
     --------
-    :func:`tedana.utils.make_adaptive_mask` : The function used to create the ``adaptive_mask``
-                                              parameter.
+    :func:`tedana.utils.make_adaptive_mask` : The function used to create
+        the ``adaptive_mask` parameter.
+    :module:`tedana.constants` : The module describing the filenames for
+        various naming conventions
     """
     if algorithm == 'kundu':
         alg_str = ("followed by the Kundu component selection decision "
@@ -204,8 +202,8 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
     data_z = (data_z - data_z.mean()) / data_z.std()  # var normalize everything
 
     if algorithm in ['mdl', 'aic', 'kic']:
-        data_img = io.new_nii_like(ref_img, utils.unmask(data, mask))
-        mask_img = io.new_nii_like(ref_img, mask.astype(int))
+        data_img = io.new_nii_like(io_generator.reference_img, utils.unmask(data, mask))
+        mask_img = io.new_nii_like(io_generator.reference_img, mask.astype(int))
         voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca(
             data_img, mask_img, algorithm, normalize=True)
     elif isinstance(algorithm, Number):
@@ -231,10 +229,11 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
     # Compute Kappa and Rho for PCA comps
     # Normalize each component's time series
     vTmixN = stats.zscore(comp_ts, axis=0)
-    comptable, _, _, _ = metrics.dependence_metrics(
-                data_cat, data_oc, comp_ts, adaptive_mask, tes, ref_img,
-                reindex=False, mmixN=vTmixN, algorithm=None,
-                label='mepca_', out_dir=out_dir, verbose=verbose)
+    comptable, _, metric_metadata, _, _ = metrics.dependence_metrics(
+        data_cat, data_oc, comp_ts, adaptive_mask, tes, io_generator,
+        reindex=False, mmixN=vTmixN, algorithm=None,
+        label='PCA', verbose=verbose
+    )
 
     # varex_norm from PCA overrides varex_norm from dependence_metrics,
     # but we retain the original
@@ -243,38 +242,68 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
     comptable['normalized variance explained'] = varex_norm
 
     # write component maps to 4D image
-    comp_ts_z = stats.zscore(comp_ts, axis=0)
-    comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask)
-    io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img)
+    comp_maps = utils.unmask(computefeats2(data_oc, comp_ts, mask), mask)
+    io_generator.save_file(comp_maps, 'z-scored PCA components img')
 
     # Select components using decision tree
     if algorithm == 'kundu':
-        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=False)
+        comptable, metric_metadata = kundu_tedpca(
+            comptable,
+            metric_metadata,
+            n_echos,
+            kdaw,
+            rdaw,
+            stabilize=False,
+        )
     elif algorithm == 'kundu-stabilize':
-        comptable = kundu_tedpca(comptable, n_echos, kdaw, rdaw, stabilize=True)
+        comptable, metric_metadata = kundu_tedpca(
+            comptable,
+            metric_metadata,
+            n_echos,
+            kdaw,
+            rdaw,
+            stabilize=True,
+        )
     else:
         alg_str = "variance explained-based" if isinstance(algorithm, Number) else algorithm
         LGR.info('Selected {0} components with {1} dimensionality '
                  'detection'.format(comptable.shape[0], alg_str))
         comptable['classification'] = 'accepted'
         comptable['rationale'] = ''
 
-    # Save decomposition
+    # Save decomposition files
     comp_names = [io.add_decomp_prefix(comp, prefix='pca', max_value=comptable.index.max())
                   for comp in comptable.index.values]
 
     mixing_df = pd.DataFrame(data=comp_ts, columns=comp_names)
-    mixing_df.to_csv(op.join(out_dir, 'pca_mixing.tsv'), sep='\t', index=False)
-
-    comptable['Description'] = 'PCA fit to optimally combined data.'
-    mmix_dict = {}
-    mmix_dict['Method'] = ('Principal components analysis implemented by '
-                           'sklearn. Components are sorted by variance '
-                           'explained in descending order. '
-                           'Component signs are flipped to best match the '
-                           'data.')
-    io.save_comptable(comptable, op.join(out_dir, 'pca_decomposition.json'),
-                      label='pca', metadata=mmix_dict)
+    io_generator.save_file(mixing_df, "PCA mixing tsv")
+
+    # Save component table and associated json
+    temp_comptable = comptable.set_index("Component", inplace=False)
+    io_generator.save_file(temp_comptable, "PCA metrics tsv")
+
+    metric_metadata["Component"] = {
+        "LongName": "Component identifier",
+        "Description": (
+            "The unique identifier of each component. "
+            "This identifier matches column names in the mixing matrix TSV file."
+        ),
+    }
+    io_generator.save_file(metric_metadata, "PCA metrics json")
+
+    decomp_metadata = {
+        "Method": (
+            "Principal components analysis implemented by sklearn. "
+            "Components are sorted by variance explained in descending order. "
+            "Component signs are flipped to best match the data."
+        ),
+    }
+    for comp_name in comp_names:
+        decomp_metadata[comp_name] = {
+            "Description": "PCA fit to optimally combined data.",
+            "Method": "tedana",
+        }
+    io_generator.save_file(decomp_metadata, "PCA decomposition json")
 
     acc = comptable[comptable.classification == 'accepted'].index.values
     n_components = acc.size

diff --git a/tedana/gscontrol.py b/tedana/gscontrol.py
@@ -2,21 +2,21 @@
 Global signal control methods
 """
 import logging
-import os.path as op
 
 import numpy as np
+import pandas as pd
 from scipy import stats
 from scipy.special import lpmv
 
-from tedana import io, utils
+from tedana import utils
 from tedana.due import due, Doi
 
 LGR = logging.getLogger(__name__)
 RepLGR = logging.getLogger("REPORT")
 RefLGR = logging.getLogger("REFERENCES")
 
 
-def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4):
+def gscontrol_raw(catd, optcom, n_echos, io_generator, dtrank=4):
     """
     Removes global signal from individual echo `catd` and `optcom` time series
 
@@ -34,10 +34,8 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4):
         Optimally combined functional data (i.e., the output of `make_optcom`)
     n_echos : :obj:`int`
         Number of echos in data. Should be the same as `E` dimension of `catd`
-    ref_img : :obj:`str` or img_like
-        Reference image to dictate how outputs are saved to disk
-    out_dir : :obj:`str`, optional
-        Output directory.
+    io_generator : :obj:`tedana.io.OutputGenerator`
+        The output generator for this workflow
     dtrank : :obj:`int`, optional
         Specifies degree of Legendre polynomial basis function for estimating
         spatial global signal. Default: 4
@@ -78,23 +76,25 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4):
     detr = dat - np.dot(sol.T, Lmix.T)[0]
     sphis = (detr).min(axis=1)
     sphis -= sphis.mean()
-    io.filewrite(utils.unmask(sphis, Gmask), op.join(out_dir, 'T1gs'), ref_img)
+    io_generator.save_file(utils.unmask(sphis, Gmask), "gs img")
 
     # find time course ofc the spatial global signal
     # make basis with the Legendre basis
     glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat, rcond=None)[0]
     glsig = stats.zscore(glsig, axis=None)
-    np.savetxt(op.join(out_dir, 'glsig.1D'), glsig)
+
+    glsig_df = pd.DataFrame(data=glsig.T, columns=['global_signal'])
+    io_generator.save_file(glsig_df, "global signal time series tsv")
     glbase = np.hstack([Lmix, glsig.T])
 
     # Project global signal out of optimally combined data
     sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T, rcond=None)[0]
     tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                              np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]
 
-    io.filewrite(optcom, op.join(out_dir, 'tsoc_orig'), ref_img)
+    io_generator.save_file(optcom, "has gs combined img")
     dm_optcom = utils.unmask(tsoc_nogs, Gmask)
-    io.filewrite(dm_optcom, op.join(out_dir, 'tsoc_nogs'), ref_img)
+    io_generator.save_file(dm_optcom, "removed gs combined img")
 
     # Project glbase out of each echo
     dm_catd = catd.copy()  # don't overwrite catd
@@ -111,7 +111,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, out_dir='.', dtrank=4):
 @due.dcite(Doi("10.1073/pnas.1301725110"),
            description="Minimum image regression to remove T1-like effects "
                        "from the denoised data.")
-def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir="."):
+def minimum_image_regression(optcom_ts, mmix, mask, comptable, io_generator):
     """
     Perform minimum image regression (MIR) to remove T1-like effects from
     BOLD-like components.
@@ -131,10 +131,8 @@ def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir=
     comptable : (C x X) :obj:`pandas.DataFrame`
         Component metric table. One row for each component, with a column for
         each metric. The index should be the component number.
-    ref_img : :obj:`str` or img_like
-        Reference image to dictate how outputs are saved to disk
-    out_dir : :obj:`str`, optional
-        Output directory.
+    io_generator : :obj:`tedana.io.OutputGenerator`
+        The output generating object for this workflow
 
     Notes
     -----
@@ -202,7 +200,7 @@ def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir=
     mehk_ts = np.dot(comp_pes[:, acc], mmix[:, acc].T)
     t1_map = mehk_ts.min(axis=-1)  # map of T1-like effect
     t1_map -= t1_map.mean()
-    io.filewrite(utils.unmask(t1_map, mask), op.join(out_dir, "sphis_hik"), ref_img)
+    io_generator.save_file(utils.unmask(t1_map, mask), "t1 like img")
     t1_map = t1_map[:, np.newaxis]
 
     # Find the global signal based on the T1-like effect
@@ -213,11 +211,11 @@ def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir=
         np.linalg.lstsq(glob_sig.T, mehk_ts.T, rcond=None)[0].T, glob_sig
     )
     hik_ts = mehk_noT1gs * optcom_std  # rescale
-    io.filewrite(utils.unmask(hik_ts, mask), op.join(out_dir, "hik_ts_OC_MIR"), ref_img)
+    io_generator.save_file(utils.unmask(hik_ts, mask), "ICA accepted mir denoised img")
 
     # Make denoised version of T1-corrected time series
     medn_ts = optcom_mean + ((mehk_noT1gs + resid) * optcom_std)
-    io.filewrite(utils.unmask(medn_ts, mask), op.join(out_dir, "dn_ts_OC_MIR"), ref_img)
+    io_generator.save_file(utils.unmask(medn_ts, mask), "mir denoised img")
 
     # Orthogonalize mixing matrix w.r.t. T1-GS
     mmix_noT1gs = mmix.T - np.dot(
@@ -230,9 +228,9 @@ def minimum_image_regression(optcom_ts, mmix, mask, comptable, ref_img, out_dir=
 
     # Write T1-corrected components and mixing matrix
     comp_pes_norm = np.linalg.lstsq(mmix_noT1gs_z.T, optcom_z.T, rcond=None)[0].T
-    io.filewrite(
+    io_generator.save_file(
         utils.unmask(comp_pes_norm[:, 2:], mask),
-        op.join(out_dir, "betas_hik_OC_MIR"),
-        ref_img,
+        "ICA accepted mir component weights img",
     )
-    np.savetxt(op.join(out_dir, "meica_mix_MIR.1D"), mmix_noT1gs)
+    mixing_df = pd.DataFrame(data=mmix_noT1gs.T, columns=comptable["Component"].values)
+    io_generator.save_file(mixing_df, "ICA MIR mixing tsv")