Incorporated the comments on add robustica

ME-ICA · Nov 1, 2023 · f2cdb4e · f2cdb4e
1 parent f4eaa3e
commit f2cdb4e
Show file tree

Hide file tree

Showing 8 changed files with 1,006 additions and 101 deletions.
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/tedana/config.py b/tedana/config.py
@@ -0,0 +1,4 @@
+DEFAULT_ICA_METHOD = "robustica"
+DEFAULT_N_ROBUST_RUNS = 30
+DEFAULT_N_MAX_ITER = 500
+DEFAULT_N_MAX_RESTART = 10
diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py
@@ -1,33 +1,48 @@
-"""
-ICA and related signal decomposition methods for tedana
-"""
+"""ICA and related signal decomposition methods for tedana"""
 import logging
 import warnings
 
-import sys
-
 import numpy as np
+from robustica import RobustICA
 from scipy import stats
 from sklearn.decomposition import FastICA
-from robustica import RobustICA ####BTBTBT
+
+from tedana.config import (
+    DEFAULT_ICA_METHOD,
+    DEFAULT_N_MAX_ITER,
+    DEFAULT_N_MAX_RESTART,
+    DEFAULT_N_ROBUST_RUNS,
+)
 
 LGR = logging.getLogger("GENERAL")
 RepLGR = logging.getLogger("REPORT")
 
 
-def tedica(data, n_components, fixed_seed, ica_method="robustica", n_robust_runs=30, maxit=500, maxrestart=10): ####BTBTBTB
+def tedica(
+    data,
+    n_components,
+    fixed_seed,
+    ica_method=DEFAULT_ICA_METHOD,
+    n_robust_runs=DEFAULT_N_ROBUST_RUNS,
+    maxit=DEFAULT_N_MAX_ITER,
+    maxrestart=DEFAULT_N_MAX_RESTART,
+):
     """
-    Perform ICA on `data` and returns mixing matrix
+    Perform ICA on `data` with the user selected ica method and returns mixing matrix
 
     Parameters
     ----------
     data : (S x T) :obj:`numpy.ndarray`
         Dimensionally reduced optimally combined functional data, where `S` is
         samples and `T` is time
     n_components : :obj:`int`
-        Number of components retained from PCA decomposition
+        Number of components retained from PCA decomposition.
     fixed_seed : :obj:`int`
-        Seed for ensuring reproducibility of ICA results
+        Seed for ensuring reproducibility of ICA results.
+    ica_method : :obj: `str'
+        slected ICA method, can be fastica or robutica.
+    n_robust_runs : :obj: `int'
+        selected number of robust runs when robustica is used. Default is 30.
     maxit : :obj:`int`, optional
         Maximum number of iterations for ICA. Default is 500.
     maxrestart : :obj:`int`, optional
@@ -43,61 +58,159 @@ def tedica(data, n_components, fixed_seed, ica_method="robustica", n_robust_runs
     fixed_seed : :obj:`int`
         Random seed from final decomposition.
 
-    Notes
-    -----
-    Uses `sklearn` implementation of FastICA for decomposition
     """
     warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
     RepLGR.info(
         "Independent component analysis was then used to "
         "decompose the dimensionally reduced dataset."
     )
 
-    if ica_method=='robustica':
-        mmix, Iq = r_ica(data, n_components, n_robust_runs, maxit)
-        fixed_seed=-99999
-    elif ica_method=='fastica':
-        mmix, fixed_seed=f_ica(data, n_components, fixed_seed, maxit=500, maxrestart=10)
-        Iq = 0
+    ica_method = ica_method.lower()
+
+    if ica_method == "robustica":
+        mmix, fixed_seed = r_ica(
+            data,
+            n_components=n_components,
+            fixed_seed=fixed_seed,
+            n_robust_runs=n_robust_runs,
+            max_it=maxit,
+        )
+    elif ica_method == "fastica":
+        mmix, fixed_seed = f_ica(
+            data,
+            n_components=n_components,
+            fixed_seed=fixed_seed,
+            maxit=maxit,
+            maxrestart=maxrestart,
+        )
     else:
-        LGR.warning("The selected ICA method is invalid!")
-        sys.exit()
+        raise ValueError("The selected ICA method is invalid!")
 
+    return mmix, fixed_seed
 
 
+def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it):
+    """
+    Perform robustica on `data` by running FastICA multiple times (n_robust runes)
+    and returns mixing matrix
 
-    return mmix, fixed_seed
+    Parameters
+    ----------
+    data : (S x T) :obj:`numpy.ndarray`
+        Dimensionally reduced optimally combined functional data, where `S` is
+        samples and `T` is time
+    n_components : :obj:`int`
+        Number of components retained from PCA decomposition.
+    fixed_seed : :obj:`int`
+        Seed for ensuring reproducibility of ICA results.
+    n_robust_runs : :obj: `int'
+        selected number of robust runs when robustica is used. Default is 30.
+    maxit : :obj:`int`, optional
+        Maximum number of iterations for ICA. Default is 500.
 
+    Returns
+    -------
+    mmix : (T x C) :obj:`numpy.ndarray`
+        Z-scored mixing matrix for converting input data to component space,
+        where `C` is components and `T` is the same as in `data`
+    fixed_seed : :obj:`int`
+        Random seed from final decomposition.
 
-def r_ica(data, n_components, n_robust_runs, max_it): ####BTBTBTB:
+    """
+    if n_robust_runs > 200:
+        LGR.warning(
+            "The selected n_robust_runs is a very big number! The process will take a long time!"
+        )
 
-    if n_robust_runs>100:
-        LGR.warning("The selected n_robust_runs is a very big number!")
+    RepLGR.info("RobustICA package was used for ICA decomposition \\citep{Anglada2022}.")
 
+    if fixed_seed == -1:
+        fixed_seed = np.random.randint(low=1, high=1000)
 
-    RepLGR.info(
-        "RobustICA package was used for ICA decomposition \\citep{Anglada2022}."
-    )
-    rica0 = RobustICA(n_components=n_components, robust_runs=n_robust_runs, whiten='arbitrary-variance',max_iter= max_it,
-                      robust_dimreduce=False, fun='logcosh')
-    S0, mmix = rica0.fit_transform(data)
+    try:
+        rica = RobustICA(
+            n_components=n_components,
+            robust_runs=n_robust_runs,
+            whiten="arbitrary-variance",
+            max_iter=max_it,
+            random_state=fixed_seed,
+            robust_dimreduce=False,
+            fun="logcosh",
+            robust_method="DBSCAN",
+        )
 
-    q0 = rica0.evaluate_clustering(rica0.S_all, rica0.clustering.labels_, rica0.signs_, rica0.orientation_)
+        S, mmix = rica.fit_transform(data)
+        q = rica.evaluate_clustering(
+            rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_
+        )
 
-
-    Iq0 = np.array(np.mean(q0.iq))
-
+    except:
+        rica = RobustICA(
+            n_components=n_components,
+            robust_runs=n_robust_runs,
+            whiten="arbitrary-variance",
+            max_iter=max_it,
+            random_state=fixed_seed,
+            robust_dimreduce=False,
+            fun="logcosh",
+            robust_method="AgglomerativeClustering",
+        )
+
+        S, mmix = rica.fit_transform(data)
+        q = rica.evaluate_clustering(
+            rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_
+        )
 
+    iq = np.array(np.mean(q[q["cluster_id"] >= 0].iq))  # The cluster labeled -1 is noise
+
+    if iq < 0.6:
+        LGR.warning(
+            "The resultant mean Index Quality is low. It  is recommended to rerun the "
+            "process with a different seed."
+        )
+
+    mmix = mmix[:, q["cluster_id"] >= 0]
     mmix = stats.zscore(mmix, axis=0)
 
     LGR.info(
-        "RobustICA with {0} robust runs was used \n"
-        "The mean index quality is {1}".format(n_robust_runs, Iq0)
+        "RobustICA with {0} robust runs and seed {1} was used. "
+        "The mean Index Quality is {2}".format(n_robust_runs, fixed_seed, iq)
     )
-    return mmix, Iq0
+    return mmix, fixed_seed
 
 
 def f_ica(data, n_components, fixed_seed, maxit, maxrestart):
+    """
+    Perform FastICA on `data` and returns mixing matrix
+
+    Parameters
+    ----------
+    data : (S x T) :obj:`numpy.ndarray`
+        Dimensionally reduced optimally combined functional data, where `S` is
+        samples and `T` is time
+    n_components : :obj:`int`
+        Number of components retained from PCA decomposition
+    fixed_seed : :obj:`int`
+        Seed for ensuring reproducibility of ICA results
+    maxit : :obj:`int`, optional
+        Maximum number of iterations for ICA. Default is 500.
+    maxrestart : :obj:`int`, optional
+        Maximum number of attempted decompositions to perform with different
+        random seeds. ICA will stop running if there is convergence prior to
+        reaching this limit. Default is 10.
+
+    Returns
+    -------
+    mmix : (T x C) :obj:`numpy.ndarray`
+        Z-scored mixing matrix for converting input data to component space,
+        where `C` is components and `T` is the same as in `data`
+    fixed_seed : :obj:`int`
+        Random seed from final decomposition.
+
+    Notes
+    -----
+    Uses `sklearn` implementation of FastICA for decomposition
+    """
     if fixed_seed == -1:
         fixed_seed = np.random.randint(low=1, high=1000)
 
@@ -135,4 +248,4 @@ def f_ica(data, n_components, fixed_seed, maxit, maxrestart):
 
     mmix = ica.mixing_
     mmix = stats.zscore(mmix, axis=0)
-    return mmix, fixed_seed
+    return mmix, fixed_seed
diff --git a/tedana/resources/references.bib b/tedana/resources/references.bib
@@ -313,3 +313,13 @@ @Article{Hunter:2007
   doi       = {10.1109/MCSE.2007.55},
   year      = 2007
 }
+
+@Article{Anglada:2022,
+  Author    = {Anglada-Girotto Miquel and Miravet-Verde Samuel and Serrano Luis and Head Sarah},
+  Title     = {robustica: customizable robust independent component analysis},
+  Journal   = {BMC Bioinformatics},
+  Volume    = {23},
+  Number    = {519},
+  doi       = {10.1186/s12859-022-05043-9},
+  year      = 2022
+}