From 319bbba5ba86c3cc31c0a4882918db9f1e72767e Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 14 Jul 2021 11:14:52 -0400 Subject: [PATCH] [REF] Do not modify mixing matrix with sign-flipping (#749) * Add "optimal sign" metric and do not alter mixing matrix. * Coerce optimal signs to integers. * Drop sign-flipping from method descriptions. * Adjust figure signs. * Write out PCA metrics TSV with index. * Note issue with column reordering in metric calculation. It's not a big deal, but devs might wonder why the columns in the component table don't match the order hardcoded into generate_metrics. * Use save_file instead of to_csv. --- tedana/decomposition/pca.py | 8 ++---- tedana/metrics/_utils.py | 3 +- tedana/metrics/collect.py | 45 +++++++++++++++--------------- tedana/reporting/static_figures.py | 3 ++ tedana/tests/test_metrics.py | 2 +- tedana/workflows/tedana.py | 33 +++++++--------------- 6 files changed, 42 insertions(+), 52 deletions(-) diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py index 01b507869..8a4220a71 100644 --- a/tedana/decomposition/pca.py +++ b/tedana/decomposition/pca.py @@ -233,7 +233,7 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, 'variance explained', 'normalized variance explained', 'd_table_score' ] - comptable, _ = metrics.collect.generate_metrics( + comptable = metrics.collect.generate_metrics( data_cat, data_oc, comp_ts, adaptive_mask, tes, io_generator, 'PCA', metrics=required_metrics, @@ -281,17 +281,15 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, io_generator.save_file(mixing_df, "PCA mixing tsv") # Save component table and associated json - temp_comptable = comptable.set_index("Component", inplace=False) - io_generator.save_file(temp_comptable, "PCA metrics tsv") + io_generator.save_file(comptable, "PCA metrics tsv") - metric_metadata = metrics.collect.get_metadata(temp_comptable) + metric_metadata = metrics.collect.get_metadata(comptable) io_generator.save_file(metric_metadata, "PCA metrics json") decomp_metadata = { "Method": ( "Principal components analysis implemented by sklearn. " "Components are sorted by variance explained in descending order. " - "Component signs are flipped to best match the data." ), } for comp_name in comp_names: diff --git a/tedana/metrics/_utils.py b/tedana/metrics/_utils.py index f40b0d87b..eb670456d 100644 --- a/tedana/metrics/_utils.py +++ b/tedana/metrics/_utils.py @@ -74,8 +74,9 @@ def determine_signs(weights, axis=0): """ # compute skews to determine signs based on unnormalized weights, signs = stats.skew(weights, axis=axis) + signs[signs == 0] = 1 # Default to not flipping signs /= np.abs(signs) - return signs + return signs.astype(int) def flip_components(*args, signs): diff --git a/tedana/metrics/collect.py b/tedana/metrics/collect.py index fd8d77123..9272935f3 100644 --- a/tedana/metrics/collect.py +++ b/tedana/metrics/collect.py @@ -58,8 +58,6 @@ def generate_metrics( comptable : (C x X) :obj:`pandas.DataFrame` Component metric table. One row for each component, with a column for each metric. The index is the component number. - mixing : :obj:`numpy.ndarray` - Mixing matrix after sign flipping. """ # Load metric dependency tree from json file dependency_config = op.join(utils.get_resource_path(), "config", "metrics.json") @@ -131,6 +129,7 @@ def generate_metrics( LGR.info("Calculating weight maps") metric_maps["map weight"] = dependence.calculate_weights(data_optcom, mixing) signs = determine_signs(metric_maps["map weight"], axis=0) + comptable["optimal sign"] = signs metric_maps["map weight"], mixing = flip_components( metric_maps["map weight"], mixing, signs=signs ) @@ -361,9 +360,11 @@ def generate_metrics( echo=(i_echo + 1) ) - # Build new comptable, re-ordered like previous tedana versions - previous_order = ( - "kappa", "rho", "variance explained", + # Reorder component table columns based on previous tedana versions + # NOTE: Some new columns will be calculated and columns may be reordered during + # component selection + preferred_order = ( + "Component", "kappa", "rho", "variance explained", "normalized variance explained", "estimated normalized variance explained", "countsigFT2", "countsigFS0", @@ -372,24 +373,11 @@ def generate_metrics( "d_table_score", "kappa ratio", "d_table_score_scrub", "classification", "rationale", ) - reordered = pd.DataFrame() - for metric in previous_order: - if metric in comptable: - reordered[metric] = comptable[metric] - # Add in things with less relevant order - cmp_cols = comptable.columns - disordered = set(cmp_cols) & (set(cmp_cols) ^ set(previous_order)) - for metric in disordered: - reordered[metric] = comptable[metric] - # Add in component labels with new ordering - reordered["Component"] = [ - io.add_decomp_prefix( - comp, prefix=label, max_value=reordered.shape[0] - ) - for comp in reordered.index.values - ] + first_columns = [col for col in preferred_order if col in comptable.columns] + other_columns = [col for col in comptable.columns if col not in preferred_order] + comptable = comptable[first_columns + other_columns] - return reordered, mixing + return comptable def get_metadata(comptable): @@ -608,6 +596,19 @@ def get_metadata(comptable): ), "Units": "arbitrary", } + if "optimal sign" in comptable: + metric_metadata["optimal sign"] = { + "LongName": "Optimal component sign", + "Description": ( + "Optimal sign determined based on skew direction of component parameter estimates " + "across the brain. In cases where components were left-skewed (-1), the component " + "time series is flipped prior to metric calculation." + ), + "Levels": { + 1: "Component is not flipped prior to metric calculation.", + -1: "Component is flipped prior to metric calculation.", + }, + } # There are always components in the comptable, definitionally metric_metadata["Component"] = { diff --git a/tedana/reporting/static_figures.py b/tedana/reporting/static_figures.py index e170b0a2c..1f33b4f08 100644 --- a/tedana/reporting/static_figures.py +++ b/tedana/reporting/static_figures.py @@ -67,6 +67,9 @@ def comp_figures(ts, mask, comptable, mmix, io_generator, png_cmap): # Get the lenght of the timeseries n_vols = len(mmix) + # Flip signs of mixing matrix as needed + mmix = mmix * comptable["optimal sign"].values + # regenerate the beta images ts_B = stats.get_coeffs(ts, mmix, mask) ts_B = ts_B.reshape(io_generator.reference_img.shape[:3] + ts_B.shape[1:]) diff --git a/tedana/tests/test_metrics.py b/tedana/tests/test_metrics.py index 6639e4e81..93ce8623e 100644 --- a/tedana/tests/test_metrics.py +++ b/tedana/tests/test_metrics.py @@ -48,7 +48,7 @@ def test_smoke_generate_metrics(testdata1): "normalized variance explained", "d_table_score", ] - comptable, mixing = collect.generate_metrics( + comptable = collect.generate_metrics( testdata1["data_cat"], testdata1["data_optcom"], testdata1["mixing"], diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 8085e1c5e..9437ce578 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -550,7 +550,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, n_restarts = 0 seed = fixed_seed while keep_restarting: - mmix_orig, seed = decomposition.tedica( + mmix, seed = decomposition.tedica( dd, n_components, seed, maxit, maxrestart=(maxrestart - n_restarts) ) @@ -567,8 +567,8 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, 'variance explained', 'normalized variance explained', 'd_table_score' ] - comptable, mmix = metrics.collect.generate_metrics( - catd, data_oc, mmix_orig, masksum, tes, + comptable = metrics.collect.generate_metrics( + catd, data_oc, mmix, masksum, tes, io_generator, 'ICA', metrics=required_metrics, ) @@ -590,7 +590,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, else: LGR.info('Using supplied mixing matrix from ICA') mixing_file = io_generator.get_name("ICA mixing tsv") - mmix_orig = pd.read_table(mixing_file).values + mmix = pd.read_table(mixing_file).values if ctab is None: required_metrics = [ @@ -599,8 +599,8 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, 'variance explained', 'normalized variance explained', 'd_table_score' ] - comptable, mmix = metrics.collect.generate_metrics( - catd, data_oc, mmix_orig, masksum, tes, + comptable = metrics.collect.generate_metrics( + catd, data_oc, mmix, masksum, tes, io_generator, 'ICA', metrics=required_metrics, ) @@ -608,7 +608,6 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, comptable, n_echos, n_vols ) else: - mmix = mmix_orig.copy() comptable = pd.read_table(ctab) if manacc is not None: @@ -620,27 +619,19 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, # Write out ICA files. comp_names = comptable["Component"].values mixing_df = pd.DataFrame(data=mmix, columns=comp_names) - mixing_df.to_csv(io_generator.get_name("ICA mixing tsv"), sep="\t", index=False) + io_generator.save_file(mixing_df, "ICA mixing tsv") betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) io_generator.save_file(betas_oc, 'z-scored ICA components img') # Save component table and associated json - temp_comptable = comptable.set_index("Component", inplace=False) - temp_comptable.to_csv( - io_generator.get_name("ICA metrics tsv"), - index=True, - index_label="Component", - sep='\t', - ) - metric_metadata = metrics.collect.get_metadata(temp_comptable) + io_generator.save_file(comptable, "ICA metrics tsv") + metric_metadata = metrics.collect.get_metadata(comptable) io_generator.save_file(metric_metadata, "ICA metrics json") decomp_metadata = { "Method": ( "Independent components analysis with FastICA " "algorithm implemented by sklearn. " - "Component signs are flipped to best match the " - "data." ), } for comp_name in comp_names: @@ -670,11 +661,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, comp_names = [io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max()) for comp in comptable.index.values] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) - mixing_df.to_csv( - io_generator.get_name("ICA orthogonalized mixing tsv"), - sep='\t', - index=False - ) + io_generator.save_file(mixing_df, "ICA orthogonalized mixing tsv") RepLGR.info("Rejected components' time series were then " "orthogonalized with respect to accepted components' time " "series.")