diff --git a/setup.cfg b/setup.cfg index 96c2cb8e7..8a817bab7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,7 @@ classifiers = python_requires = >= 3.6 install_requires = bokeh<2.3.0 - mapca~=0.0.1 + mapca>=0.0.3 matplotlib nibabel>=2.5.1 nilearn>=0.7 diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py index e98ef907e..73a5eee71 100644 --- a/tedana/decomposition/pca.py +++ b/tedana/decomposition/pca.py @@ -6,11 +6,12 @@ import numpy as np import pandas as pd -from mapca import ma_pca +from mapca import MovingAveragePCA from scipy import stats from sklearn.decomposition import PCA from tedana import io, metrics, utils +from tedana.reporting import pca_results as plot_pca_results from tedana.selection import kundu_tedpca from tedana.stats import computefeats2 @@ -236,9 +237,92 @@ def tedpca( if algorithm in ["mdl", "aic", "kic"]: data_img = io.new_nii_like(io_generator.reference_img, utils.unmask(data, mask)) mask_img = io.new_nii_like(io_generator.reference_img, mask.astype(int)) - voxel_comp_weights, varex, varex_norm, comp_ts = ma_pca( - data_img, mask_img, algorithm, normalize=True + ma_pca = MovingAveragePCA(criterion=algorithm, normalize=True) + _ = ma_pca.fit_transform(data_img, mask_img) + + # Extract results from maPCA + voxel_comp_weights = ma_pca.u_ + varex = ma_pca.explained_variance_ + varex_norm = ma_pca.explained_variance_ratio_ + comp_ts = ma_pca.components_.T + aic = ma_pca.aic_ + kic = ma_pca.kic_ + mdl = ma_pca.mdl_ + varex_90 = ma_pca.varexp_90_ + varex_95 = ma_pca.varexp_95_ + all_comps = ma_pca.all_ + + # Extract number of components and variance explained for logging and plotting + n_aic = aic["n_components"] + aic_varexp = np.round(aic["explained_variance_total"], 3) + n_kic = kic["n_components"] + kic_varexp = np.round(kic["explained_variance_total"], 3) + n_mdl = mdl["n_components"] + mdl_varexp = np.round(mdl["explained_variance_total"], 3) + n_varex_90 = varex_90["n_components"] + varex_90_varexp = np.round(varex_90["explained_variance_total"], 3) + n_varex_95 = varex_95["n_components"] + varex_95_varexp = np.round(varex_95["explained_variance_total"], 3) + all_varex = np.round(all_comps["explained_variance_total"], 3) + + # Print out the results + LGR.info("Optimal number of components based on different criteria:") + LGR.info( + f"AIC: {n_aic} | KIC: {n_kic} | MDL: {n_mdl} | 90% varexp: {n_varex_90} " + f"| 95% varexp: {n_varex_95}" + ) + + LGR.info("Explained variance based on different criteria:") + LGR.info( + f"AIC: {aic_varexp}% | KIC: {kic_varexp}% | MDL: {mdl_varexp}% | " + f"90% varexp: {varex_90_varexp}% | 95% varexp: {varex_95_varexp}%" + ) + + pca_optimization_curves = np.array([aic["value"], kic["value"], mdl["value"]]) + pca_criteria_components = np.array( + [ + n_aic, + n_kic, + n_mdl, + n_varex_90, + n_varex_95, + ] ) + + # Plot maPCA optimization curves + LGR.info("Plotting maPCA optimization curves") + plot_pca_results(pca_optimization_curves, pca_criteria_components, all_varex, io_generator) + + # Save maPCA results into a dictionary + mapca_results = { + "aic": { + "n_components": n_aic, + "explained_variance_total": aic_varexp, + "curve": aic["value"], + }, + "kic": { + "n_components": n_kic, + "explained_variance_total": kic_varexp, + "curve": kic["value"], + }, + "mdl": { + "n_components": n_mdl, + "explained_variance_total": mdl_varexp, + "curve": mdl["value"], + }, + "varex_90": { + "n_components": n_varex_90, + "explained_variance_total": varex_90_varexp, + }, + "varex_95": { + "n_components": n_varex_95, + "explained_variance_total": varex_95_varexp, + }, + } + + # Save dictionary + io_generator.save_file(mapca_results, "PCA cross component metrics json") + elif isinstance(algorithm, Number): ppca = PCA(copy=False, n_components=algorithm, svd_solver="full") ppca.fit(data_z) diff --git a/tedana/io.py b/tedana/io.py index c1d68f4ba..e3b7506af 100644 --- a/tedana/io.py +++ b/tedana/io.py @@ -719,7 +719,11 @@ def prep_data_for_json(d) -> dict: # One of the values in the dict is the problem, need to recurse v = prep_data_for_json(v) elif isinstance(v, np.ndarray): + if v.dtype == np.int64 or v.dtype == np.uint64: + v = int(v) v = v.tolist() + elif isinstance(v, np.int64) or isinstance(v, np.uint64): + v = int(v) # NOTE: add more special cases for type conversions above this # comment line as an elif block d[k] = v diff --git a/tedana/reporting/__init__.py b/tedana/reporting/__init__.py index 48a0e18f1..825cc6eb6 100644 --- a/tedana/reporting/__init__.py +++ b/tedana/reporting/__init__.py @@ -3,6 +3,6 @@ """ from .html_report import generate_report -from .static_figures import comp_figures +from .static_figures import comp_figures, pca_results -__all__ = ["generate_report", "comp_figures"] +__all__ = ["generate_report", "comp_figures", "pca_results"] diff --git a/tedana/reporting/static_figures.py b/tedana/reporting/static_figures.py index 65a946a66..41615636a 100644 --- a/tedana/reporting/static_figures.py +++ b/tedana/reporting/static_figures.py @@ -288,3 +288,144 @@ def comp_figures(ts, mask, comptable, mmix, io_generator, png_cmap): compplot_name = os.path.join(io_generator.out_dir, "figures", plot_name) plt.savefig(compplot_name) plt.close() + + +def pca_results(criteria, n_components, all_varex, io_generator): + """ + Plot the PCA optimization curve for each criteria, and the variance explained curve. + + Parameters + ---------- + criteria : array-like + AIC, KIC, and MDL optimization values for increasing number of components. + n_components : array-like + Number of optimal components given by each criteria. + io_generator : object + An object containing all the information needed to generate the output. + """ + + # Plot the PCA optimization curve for each criteria + plt.figure(figsize=(10, 9)) + plt.title("PCA Criteria") + plt.xlabel("PCA components") + plt.ylabel("Arbitrary Units") + + # AIC curve + plt.plot(criteria[0, :], color="tab:blue", label="AIC") + # KIC curve + plt.plot(criteria[1, :], color="tab:orange", label="KIC") + # MDL curve + plt.plot(criteria[2, :], color="tab:green", label="MDL") + + # Vertical line depicting the optimal number of components given by AIC + plt.vlines( + n_components[0], + ymin=np.min(criteria), + ymax=np.max(criteria), + color="tab:blue", + linestyles="dashed", + ) + # Vertical line depicting the optimal number of components given by KIC + plt.vlines( + n_components[1], + ymin=np.min(criteria), + ymax=np.max(criteria), + color="tab:orange", + linestyles="dashed", + ) + # Vertical line depicting the optimal number of components given by MDL + plt.vlines( + n_components[2], + ymin=np.min(criteria), + ymax=np.max(criteria), + color="tab:green", + linestyles="dashed", + ) + # Vertical line depicting the optimal number of components for 90% variance explained + plt.vlines( + n_components[3], + ymin=np.min(criteria), + ymax=np.max(criteria), + color="tab:red", + linestyles="dashed", + label="90% varexp", + ) + # Vertical line depicting the optimal number of components for 95% variance explained + plt.vlines( + n_components[4], + ymin=np.min(criteria), + ymax=np.max(criteria), + color="tab:purple", + linestyles="dashed", + label="95% varexp", + ) + + plt.legend() + + #  Save the plot + plot_name = "pca_criteria.png" + pca_criteria_name = os.path.join(io_generator.out_dir, "figures", plot_name) + plt.savefig(pca_criteria_name) + plt.close() + + # Plot the variance explained curve + plt.figure(figsize=(10, 9)) + plt.title("Variance Explained") + plt.xlabel("PCA components") + plt.ylabel("Variance Explained") + + plt.plot(all_varex, color="black", label="Variance Explained") + + # Vertical line depicting the optimal number of components given by AIC + plt.vlines( + n_components[0], + ymin=0, + ymax=1, + color="tab:blue", + linestyles="dashed", + label="AIC", + ) + # Vertical line depicting the optimal number of components given by KIC + plt.vlines( + n_components[1], + ymin=0, + ymax=1, + color="tab:orange", + linestyles="dashed", + label="KIC", + ) + # Vertical line depicting the optimal number of components given by MDL + plt.vlines( + n_components[2], + ymin=0, + ymax=1, + color="tab:green", + linestyles="dashed", + label="MDL", + ) + # Vertical line depicting the optimal number of components for 90% variance explained + plt.vlines( + n_components[3], + ymin=0, + ymax=1, + color="tab:red", + linestyles="dashed", + label="90% varexp", + ) + # Vertical line depicting the optimal number of components for 95% variance explained + plt.vlines( + n_components[4], + ymin=0, + ymax=1, + color="tab:purple", + linestyles="dashed", + label="95% varexp", + ) + + plt.legend() + + #  Save the plot + plot_name = "pca_variance_explained.png" + pca_variance_explained_name = os.path.join(io_generator.out_dir, "figures", plot_name) + plt.savefig(pca_variance_explained_name) + plt.close() diff --git a/tedana/resources/config/outputs.json b/tedana/resources/config/outputs.json index b67bfd8dc..e8acea001 100644 --- a/tedana/resources/config/outputs.json +++ b/tedana/resources/config/outputs.json @@ -147,6 +147,10 @@ "orig": "pca_metrics", "bidsv1.5.0": "desc-PCA_metrics" }, + "PCA cross component metrics json": { + "orig": "pca_cross_component_metrics", + "bidsv1.5.0": "desc-PCA_cross_component_metrics" + }, "ICA decomposition json": { "orig": "ica_decomposition", "bidsv1.5.0": "desc-ICA_decomposition" diff --git a/tedana/tests/data/cornell_three_echo_outputs.txt b/tedana/tests/data/cornell_three_echo_outputs.txt index 806bed435..4e45b1773 100644 --- a/tedana/tests/data/cornell_three_echo_outputs.txt +++ b/tedana/tests/data/cornell_three_echo_outputs.txt @@ -9,6 +9,7 @@ desc-tedana_metrics.json desc-tedana_metrics.tsv desc-ICA_mixing.tsv desc-ICA_stat-z_components.nii.gz +desc-PCA_cross_component_metrics.json desc-PCA_decomposition.json desc-PCA_metrics.json desc-PCA_metrics.tsv @@ -93,5 +94,7 @@ figures/comp_065.png figures/comp_066.png figures/comp_067.png figures/comp_068.png +figures/pca_criteria.png +figures/pca_variance_explained.png report.txt -tedana_report.html \ No newline at end of file +tedana_report.html