Skip to content

Commit

Permalink
[REF] Do not modify mixing matrix with sign-flipping (#749)
Browse files Browse the repository at this point in the history
* Add "optimal sign" metric and do not alter mixing matrix.

* Coerce optimal signs to integers.

* Drop sign-flipping from method descriptions.

* Adjust figure signs.

* Write out PCA metrics TSV with index.

* Note issue with column reordering in metric calculation.

It's not a big deal, but devs might wonder why the columns in the component table don't match the order hardcoded into generate_metrics.

* Use save_file instead of to_csv.
  • Loading branch information
tsalo committed Jul 14, 2021
1 parent d449925 commit 319bbba
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 52 deletions.
8 changes: 3 additions & 5 deletions tedana/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
'variance explained', 'normalized variance explained',
'd_table_score'
]
comptable, _ = metrics.collect.generate_metrics(
comptable = metrics.collect.generate_metrics(
data_cat, data_oc, comp_ts, adaptive_mask,
tes, io_generator, 'PCA',
metrics=required_metrics,
Expand Down Expand Up @@ -281,17 +281,15 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG,
io_generator.save_file(mixing_df, "PCA mixing tsv")

# Save component table and associated json
temp_comptable = comptable.set_index("Component", inplace=False)
io_generator.save_file(temp_comptable, "PCA metrics tsv")
io_generator.save_file(comptable, "PCA metrics tsv")

metric_metadata = metrics.collect.get_metadata(temp_comptable)
metric_metadata = metrics.collect.get_metadata(comptable)
io_generator.save_file(metric_metadata, "PCA metrics json")

decomp_metadata = {
"Method": (
"Principal components analysis implemented by sklearn. "
"Components are sorted by variance explained in descending order. "
"Component signs are flipped to best match the data."
),
}
for comp_name in comp_names:
Expand Down
3 changes: 2 additions & 1 deletion tedana/metrics/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@ def determine_signs(weights, axis=0):
"""
# compute skews to determine signs based on unnormalized weights,
signs = stats.skew(weights, axis=axis)
signs[signs == 0] = 1 # Default to not flipping
signs /= np.abs(signs)
return signs
return signs.astype(int)


def flip_components(*args, signs):
Expand Down
45 changes: 23 additions & 22 deletions tedana/metrics/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ def generate_metrics(
comptable : (C x X) :obj:`pandas.DataFrame`
Component metric table. One row for each component, with a column for
each metric. The index is the component number.
mixing : :obj:`numpy.ndarray`
Mixing matrix after sign flipping.
"""
# Load metric dependency tree from json file
dependency_config = op.join(utils.get_resource_path(), "config", "metrics.json")
Expand Down Expand Up @@ -131,6 +129,7 @@ def generate_metrics(
LGR.info("Calculating weight maps")
metric_maps["map weight"] = dependence.calculate_weights(data_optcom, mixing)
signs = determine_signs(metric_maps["map weight"], axis=0)
comptable["optimal sign"] = signs
metric_maps["map weight"], mixing = flip_components(
metric_maps["map weight"], mixing, signs=signs
)
Expand Down Expand Up @@ -361,9 +360,11 @@ def generate_metrics(
echo=(i_echo + 1)
)

# Build new comptable, re-ordered like previous tedana versions
previous_order = (
"kappa", "rho", "variance explained",
# Reorder component table columns based on previous tedana versions
# NOTE: Some new columns will be calculated and columns may be reordered during
# component selection
preferred_order = (
"Component", "kappa", "rho", "variance explained",
"normalized variance explained",
"estimated normalized variance explained",
"countsigFT2", "countsigFS0",
Expand All @@ -372,24 +373,11 @@ def generate_metrics(
"d_table_score", "kappa ratio", "d_table_score_scrub",
"classification", "rationale",
)
reordered = pd.DataFrame()
for metric in previous_order:
if metric in comptable:
reordered[metric] = comptable[metric]
# Add in things with less relevant order
cmp_cols = comptable.columns
disordered = set(cmp_cols) & (set(cmp_cols) ^ set(previous_order))
for metric in disordered:
reordered[metric] = comptable[metric]
# Add in component labels with new ordering
reordered["Component"] = [
io.add_decomp_prefix(
comp, prefix=label, max_value=reordered.shape[0]
)
for comp in reordered.index.values
]
first_columns = [col for col in preferred_order if col in comptable.columns]
other_columns = [col for col in comptable.columns if col not in preferred_order]
comptable = comptable[first_columns + other_columns]

return reordered, mixing
return comptable


def get_metadata(comptable):
Expand Down Expand Up @@ -608,6 +596,19 @@ def get_metadata(comptable):
),
"Units": "arbitrary",
}
if "optimal sign" in comptable:
metric_metadata["optimal sign"] = {
"LongName": "Optimal component sign",
"Description": (
"Optimal sign determined based on skew direction of component parameter estimates "
"across the brain. In cases where components were left-skewed (-1), the component "
"time series is flipped prior to metric calculation."
),
"Levels": {
1: "Component is not flipped prior to metric calculation.",
-1: "Component is flipped prior to metric calculation.",
},
}

# There are always components in the comptable, definitionally
metric_metadata["Component"] = {
Expand Down
3 changes: 3 additions & 0 deletions tedana/reporting/static_figures.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def comp_figures(ts, mask, comptable, mmix, io_generator, png_cmap):
# Get the lenght of the timeseries
n_vols = len(mmix)

# Flip signs of mixing matrix as needed
mmix = mmix * comptable["optimal sign"].values

# regenerate the beta images
ts_B = stats.get_coeffs(ts, mmix, mask)
ts_B = ts_B.reshape(io_generator.reference_img.shape[:3] + ts_B.shape[1:])
Expand Down
2 changes: 1 addition & 1 deletion tedana/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_smoke_generate_metrics(testdata1):
"normalized variance explained",
"d_table_score",
]
comptable, mixing = collect.generate_metrics(
comptable = collect.generate_metrics(
testdata1["data_cat"],
testdata1["data_optcom"],
testdata1["mixing"],
Expand Down
33 changes: 10 additions & 23 deletions tedana/workflows/tedana.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None,
n_restarts = 0
seed = fixed_seed
while keep_restarting:
mmix_orig, seed = decomposition.tedica(
mmix, seed = decomposition.tedica(
dd, n_components, seed,
maxit, maxrestart=(maxrestart - n_restarts)
)
Expand All @@ -567,8 +567,8 @@ def tedana_workflow(data, tes, out_dir='.', mask=None,
'variance explained', 'normalized variance explained',
'd_table_score'
]
comptable, mmix = metrics.collect.generate_metrics(
catd, data_oc, mmix_orig, masksum, tes,
comptable = metrics.collect.generate_metrics(
catd, data_oc, mmix, masksum, tes,
io_generator, 'ICA',
metrics=required_metrics,
)
Expand All @@ -590,7 +590,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None,
else:
LGR.info('Using supplied mixing matrix from ICA')
mixing_file = io_generator.get_name("ICA mixing tsv")
mmix_orig = pd.read_table(mixing_file).values
mmix = pd.read_table(mixing_file).values

if ctab is None:
required_metrics = [
Expand All @@ -599,16 +599,15 @@ def tedana_workflow(data, tes, out_dir='.', mask=None,
'variance explained', 'normalized variance explained',
'd_table_score'
]
comptable, mmix = metrics.collect.generate_metrics(
catd, data_oc, mmix_orig, masksum, tes,
comptable = metrics.collect.generate_metrics(
catd, data_oc, mmix, masksum, tes,
io_generator, 'ICA',
metrics=required_metrics,
)
comptable, metric_metadata = selection.kundu_selection_v2(
comptable, n_echos, n_vols
)
else:
mmix = mmix_orig.copy()
comptable = pd.read_table(ctab)

if manacc is not None:
Expand All @@ -620,27 +619,19 @@ def tedana_workflow(data, tes, out_dir='.', mask=None,
# Write out ICA files.
comp_names = comptable["Component"].values
mixing_df = pd.DataFrame(data=mmix, columns=comp_names)
mixing_df.to_csv(io_generator.get_name("ICA mixing tsv"), sep="\t", index=False)
io_generator.save_file(mixing_df, "ICA mixing tsv")
betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask)
io_generator.save_file(betas_oc, 'z-scored ICA components img')

# Save component table and associated json
temp_comptable = comptable.set_index("Component", inplace=False)
temp_comptable.to_csv(
io_generator.get_name("ICA metrics tsv"),
index=True,
index_label="Component",
sep='\t',
)
metric_metadata = metrics.collect.get_metadata(temp_comptable)
io_generator.save_file(comptable, "ICA metrics tsv")
metric_metadata = metrics.collect.get_metadata(comptable)
io_generator.save_file(metric_metadata, "ICA metrics json")

decomp_metadata = {
"Method": (
"Independent components analysis with FastICA "
"algorithm implemented by sklearn. "
"Component signs are flipped to best match the "
"data."
),
}
for comp_name in comp_names:
Expand Down Expand Up @@ -670,11 +661,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None,
comp_names = [io.add_decomp_prefix(comp, prefix='ica', max_value=comptable.index.max())
for comp in comptable.index.values]
mixing_df = pd.DataFrame(data=mmix, columns=comp_names)
mixing_df.to_csv(
io_generator.get_name("ICA orthogonalized mixing tsv"),
sep='\t',
index=False
)
io_generator.save_file(mixing_df, "ICA orthogonalized mixing tsv")
RepLGR.info("Rejected components' time series were then "
"orthogonalized with respect to accepted components' time "
"series.")
Expand Down

0 comments on commit 319bbba

Please sign in to comment.