cleanup

OnnoKampman · Jun 1, 2024 · 6638d77 · 6638d77
1 parent fe2e9ef
commit 6638d77
Show file tree

Hide file tree

Showing 38 changed files with 212 additions and 161 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,15 +3,16 @@
 .ipynb_checkpoints/
 __pycache__
 
-# Data.
+# Data
 opk20_hivemind_paper_1/
 
+**.nii.gz
 **.tar.gz
 
 datasets/fmri/rs/HCP/hcp-openaccess/
-# HCP_PTN1200_recon2/
+datasets/fmri/rs/HCP/HCP_PTN1200_recon2/
 
-# Results.
+# Results
 
 results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d15_ts2/brain_states/k01/
 results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d15_ts2/brain_states/k02/

diff --git a/CITATION.cff b/CITATION.cff
@@ -9,6 +9,15 @@ authors:
 - family-names: "Ziminski"
   given-names: "Joe"
   orcid: "https://orcid.org/0000-0000-0000-0000"
+- family-names: "Afyouni"
+  given-names: "Soroosh"
+  orcid: "https://orcid.org/0000-0000-0000-0000"
+- family-names: "van der Wilk"
+  given-names: "Mark"
+  orcid: "https://orcid.org/0000-0000-0000-0000"
+- family-names: "Kourtzi"
+  given-names: "Zoe"
+  orcid: "https://orcid.org/0000-0000-0000-0000"
 title: "FCEst-benchmarking"
 version: 0.0.1
 doi: 10.5281/zenodo.1234
@@ -23,11 +32,20 @@ preferred-citation:
   - family-names: "Ziminski"
     given-names: "Joe"
     orcid: "https://orcid.org/0000-0000-0000-0000"
+  - family-names: "Afyouni"
+    given-names: "Soroosh"
+    orcid: "https://orcid.org/0000-0000-0000-0000"
+  - family-names: "van der Wilk"
+    given-names: "Mark"
+    orcid: "https://orcid.org/0000-0000-0000-0000"
+  - family-names: "Kourtzi"
+    given-names: "Zoe"
+    orcid: "https://orcid.org/0000-0000-0000-0000"
   doi: "10.1162/imag_a_00184"
   journal: "Imaging Neuroscience"
   month: 5
   start: 1 # First page number
-  end: 10 # Last page number
+  end: 28 # Last page number
   title: "Time-varying functional connectivity as Wishart processes"
   issue: 1
   volume: 2

diff --git a/README.md b/README.md
@@ -6,6 +6,9 @@ Functional connectivity (FC) estimates are sensitive to choice of estimation met
 This project aims to design a range of benchmarks to determine what estimation method to use.
 
 These benchmarks have been particularly developed to test the estimation methods included in the [FCEst](https://github.com/OnnoKampman/FCEst) Python package.
+Results have been published in an article in Imaging Neuroscience (see `CITATION.cff`).
+
+Many extensions of this project are possible, both in terms of adding more estimation methods and more benchmarks.
 
 ## Contributing
 

diff --git a/benchmarks/fmri/rs/HCP/TVFC_summary_measures/save_TVFC_estimates_summary_measures.py b/benchmarks/fmri/rs/HCP/TVFC_summary_measures/save_TVFC_estimates_summary_measures.py
@@ -28,10 +28,10 @@
         subset_dimensionality=data_dimensionality,
         hostname=socket.gethostname()
     )
-    n_subjects = cfg['n-subjects']
-    n_time_series = int(data_dimensionality[1:])
+    num_subjects = cfg['n-subjects']
+    num_time_series = int(data_dimensionality[1:])
     all_subjects_list = get_human_connectome_project_subjects(
-        data_dir=cfg['data-dir'], first_n_subjects=n_subjects
+        data_dir=cfg['data-dir'], first_n_subjects=num_subjects
     )
 
     for tvfc_summary_measure in cfg['TVFC-summary-measures']:
@@ -55,15 +55,15 @@
                 logging.info(f'> SUMMARY MEASURE: {tvfc_summary_measure:s}')
                 logging.info(f'> MODEL NAME:      {model_name:s}')
                 logging.info(f'> SCAN ID:         {scan_id:d}')
-                logging.info(f'> SUBJECT {i_subject+1: 3d} / {n_subjects:d}: {subject_filename:s}')
+                logging.info(f'> SUBJECT {i_subject+1: 3d} / {num_subjects:d}: {subject_filename:s}')
 
                 # Load TVFC estimates - some may be missing.
                 tvfc_estimates_filepath = os.path.join(
                     tvfc_estimates_savedir, f"{subject_filename.removesuffix('.txt'):s}.csv"
                 )
                 if not os.path.exists(tvfc_estimates_filepath):
                     logging.warning(f"Could not find TVFC estimates '{tvfc_estimates_filepath:s}'.")
-                    tvfc_estimates_array = np.empty((int(n_time_series * (n_time_series - 1) / 2), ))
+                    tvfc_estimates_array = np.empty((int(num_time_series * (num_time_series - 1) / 2), ))
                     tvfc_estimates_array[:] = np.nan
                 else:
                     tvfc_estimates_df = pd.read_csv(tvfc_estimates_filepath, index_col=0)  # (D*D, N)
@@ -98,7 +98,7 @@
                 reconstruct_symmetric_summary_measure_matrix_from_tril(
                     mean_over_subjects_edgewise_summarized_tvfc_df.values,
                     tvfc_summary_measure=tvfc_summary_measure,
-                    num_time_series=n_time_series
+                    num_time_series=num_time_series
                 )
             )  # (D, D)
 

diff --git a/benchmarks/fmri/rs/HCP/brain_states/plot_brain_states.py b/benchmarks/fmri/rs/HCP/brain_states/plot_brain_states.py
@@ -59,7 +59,7 @@ def plot_brain_state_cluster_centroids(
                 ),
                 index_col=0
             )  # (D, D)
-            n_time_series = cluster_centroid_df.shape[0]  # D
+            num_time_series = cluster_centroid_df.shape[0]  # D
 
             brain_state += 1  # make 1-indexed
             i_subplot = scan_id * n_basis_states + brain_state
@@ -69,7 +69,7 @@ def plot_brain_state_cluster_centroids(
             if data_dimensionality == 'd15':
                 cluster_centroid_array, new_rsn_names = reorder_ica_components(
                     original_matrix=cluster_centroid_df.values,
-                    n_time_series=n_time_series,
+                    num_time_series=num_time_series,
                     config_dict=config_dict,
                 )
             else:

diff --git a/benchmarks/fmri/rs/HCP/brain_states/save_brain_state_switch_counts.py b/benchmarks/fmri/rs/HCP/brain_states/save_brain_state_switch_counts.py
@@ -10,13 +10,15 @@
 
 
 def _save_number_of_brain_state_switches(
-        config_dict: dict, n_basis_states: int, connectivity_metric: str = 'correlation'
+    config_dict: dict,
+    num_basis_states: int,
+    connectivity_metric: str = 'correlation',
 ) -> None:
     for model_name in config_dict['models-brain-state-analysis']:
         model_number_of_switches_df = pd.DataFrame()
         for scan_id in config_dict['scan-ids']:
             brain_states_savedir = os.path.join(
-                config_dict['git-results-basedir'], 'brain_states', f'k{n_basis_states:02d}', f'scan_{scan_id:d}'
+                config_dict['git-results-basedir'], 'brain_states', f'k{num_basis_states:02d}', f'scan_{scan_id:d}'
             )
 
             # Load brain state assignment file.
@@ -32,7 +34,7 @@ def _save_number_of_brain_state_switches(
 
         # Save number of brain state switches for this model.
         n_brain_state_switches_savedir = os.path.join(
-            config_dict['git-results-basedir'], 'brain_states', f'k{n_basis_states:02d}'
+            config_dict['git-results-basedir'], 'brain_states', f'k{num_basis_states:02d}'
         )
         n_brain_state_switches_filename = f'number_of_brain_state_switches_{model_name:s}.csv'
         model_number_of_switches_df.to_csv(
@@ -54,5 +56,5 @@ def _save_number_of_brain_state_switches(
     for n_brain_states in n_brain_states_list:
         _save_number_of_brain_state_switches(
             config_dict=cfg,
-            n_basis_states=n_brain_states
+            num_basis_states=n_brain_states
         )
diff --git a/benchmarks/fmri/rs/HCP/imputation_benchmark/compute_edgewise_LEOO_likelihoods.py b/benchmarks/fmri/rs/HCP/imputation_benchmark/compute_edgewise_LEOO_likelihoods.py
@@ -16,18 +16,18 @@
 if __name__ == "__main__":
 
     data_split = 'LEOO'  # leave-every-other-out
+    experiment_dimensionality = 'multivariate'  # TODO: take all edges from the jointly/multivariately trained model
 
     data_dimensionality = sys.argv[1]  # 'd15', 'd50'
     model_name = sys.argv[2]           # 'SVWP_joint', 'DCC_joint', 'GO_joint', 'SW_cross_validated', 'SW_30', 'SW_60', 'sFC'
 
-    num_time_series = int(data_dimensionality[1:])
-    experiment_dimensionality = 'multivariate'  # TODO: take all edges from the jointly/multivariately trained model
     cfg = get_config_dict(
         data_set_name='HCP_PTN1200_recon2',
         subset_dimensionality=data_dimensionality,
         hostname=socket.gethostname()
     )
     num_subjects = cfg['n-subjects']
+    num_time_series = int(data_dimensionality[1:])
     all_subjects_list = get_human_connectome_project_subjects(
         data_dir=cfg['data-dir'], first_n_subjects=num_subjects
     )

diff --git a/benchmarks/fmri/rs/HCP/imputation_benchmark/plot_edgewise_LEOO_likelihoods.py b/benchmarks/fmri/rs/HCP/imputation_benchmark/plot_edgewise_LEOO_likelihoods.py
@@ -59,7 +59,7 @@ def plot_edgewise_imputation_benchmark_scores_joint(
             edgewise_likelihoods, new_rsn_names = reorder_ica_components(
                 config_dict=config_dict,
                 original_matrix=edgewise_likelihoods.values,
-                n_time_series=num_time_series,
+                num_time_series=num_time_series,
                 # lower_triangular=True
             )
         else:
@@ -131,7 +131,7 @@ def plot_edgewise_imputation_benchmark_scores(
         edgewise_likelihoods, new_rsn_names = reorder_ica_components(
             config_dict=config_dict,
             original_matrix=edgewise_likelihoods.values,
-            n_time_series=num_time_series,
+            num_time_series=num_time_series,
             # lower_triangular=True
         )
     else:
@@ -184,13 +184,13 @@ def _plot_performance_difference(config_dict: dict, edgewise_likelihoods: (pd.Da
     """
     edgewise_likelihoods_first_method, new_rsn_names = reorder_ica_components(
         original_matrix=edgewise_likelihoods[0].values,
-        n_time_series=edgewise_likelihoods[0].shape[0],
+        num_time_series=edgewise_likelihoods[0].shape[0],
         config_dict=config_dict,
         # lower_triangular=True
     )
     edgewise_likelihoods_second_method, new_rsn_names = reorder_ica_components(
         original_matrix=edgewise_likelihoods[1].values,
-        n_time_series=edgewise_likelihoods[1].shape[0],
+        num_time_series=edgewise_likelihoods[1].shape[0],
         config_dict=config_dict,
         # lower_triangular=True
     )

diff --git a/benchmarks/fmri/rs/HCP/plotters/plot_TVFC_estimates.py b/benchmarks/fmri/rs/HCP/plotters/plot_TVFC_estimates.py
@@ -52,7 +52,7 @@ def plot_hcp_tvfc_estimates(
     figsize: tuple[float]
     figures_savedir: str
     """
-    n_time_series = y_train_locations.shape[1]
+    num_time_series = y_train_locations.shape[1]
 
     sns.set(style="whitegrid")
     plt.style.use(os.path.join(config_dict['git-basedir'], 'configs', 'fig.mplstyle'))
@@ -69,9 +69,9 @@ def plot_hcp_tvfc_estimates(
     )
 
     if random_edges:
-        interaction_pairs_indices = np.triu_indices(n_time_series, k=1)  # set k=0 to include variances
+        interaction_pairs_indices = np.triu_indices(num_time_series, k=1)  # set k=0 to include variances
         interaction_pairs_indices = np.array(interaction_pairs_indices).T
-        n_interactions = int(n_time_series * (n_time_series - 1) / 2)
+        n_interactions = int(num_time_series * (num_time_series - 1) / 2)
         assert n_interactions == len(interaction_pairs_indices)
         random_interactions = np.random.choice(len(interaction_pairs_indices), num_rows*num_columns)
         interaction_pairs_indices = interaction_pairs_indices[random_interactions]
@@ -208,7 +208,6 @@ def plot_hcp_tvfc_estimates(
             x, y = load_human_connectome_project_data(
                 data_file, scan_id=scan_id, verbose=False
             )  # (N, 1), (N, D)
-            n_time_steps = x.shape[0]
 
             if data_split == 'LEOO':
                 x_train, _ = leave_every_other_out_split(x)
@@ -217,11 +216,11 @@ def plot_hcp_tvfc_estimates(
                 x_train = x
                 y_train = y
 
-            n_time_steps = x_train.shape[0]
+            num_time_steps = x_train.shape[0]
             xx = convert_to_minutes(
                 x_train,
                 repetition_time=cfg['repetition-time'],
-                data_length=n_time_steps
+                data_length=num_time_steps
             )
             plot_hcp_tvfc_estimates(
                 config_dict=cfg,

diff --git a/benchmarks/fmri/rs/HCP/plotters/plot_TVFC_estimates_summary_measures.py b/benchmarks/fmri/rs/HCP/plotters/plot_TVFC_estimates_summary_measures.py
@@ -71,17 +71,17 @@ def plot_tvfc_summary_measures_mean_over_subjects_all_edges_joint(
             )  # (D, D)
 
             # Re-order ICA components.
-            n_time_series = summarized_tvfc_df.shape[0]
+            num_time_series = summarized_tvfc_df.shape[0]
             if data_dimensionality == 'd15':
                 summarized_tvfc_array, new_rsn_names = reorder_ica_components(
                     config_dict=config_dict,
                     original_matrix=summarized_tvfc_df.values,
-                    n_time_series=n_time_series,
+                    num_time_series=num_time_series,
                 )
             else:
                 # TODO: add RSN names map for d50
                 summarized_tvfc_array = summarized_tvfc_df.values
-                new_rsn_names = np.arange(n_time_series)
+                new_rsn_names = np.arange(num_time_series)
 
             # Define mask for diagonal and upper triangular values.
             mask = np.zeros_like(summarized_tvfc_array)
@@ -160,17 +160,17 @@ def plot_tvfc_summary_measures_mean_over_subjects_all_edges(
     vmin, vmax = _set_colorbar_min_max(summary_measure)
 
     # Re-order ICA components.
-    n_time_series = summarized_tvfc_df.shape[0]
+    num_time_series = summarized_tvfc_df.shape[0]
     if data_dimensionality == 'd15':
         summarized_tvfc_array, new_rsn_names = reorder_ica_components(
             config_dict=config_dict,
             original_matrix=summarized_tvfc_df.values,
-            n_time_series=n_time_series,
+            num_time_series=num_time_series,
         )
     else:
         # TODO: add RSN names map for d50
         summarized_tvfc_array = summarized_tvfc_df.values
-        new_rsn_names = np.arange(n_time_series)
+        new_rsn_names = np.arange(num_time_series)
 
     # Define mask for diagonal and upper triangular values.
     mask = np.zeros_like(summarized_tvfc_array)

diff --git a/benchmarks/fmri/rs/HCP/plotters/plot_lengthscale_optimal_window_length_relation.py b/benchmarks/fmri/rs/HCP/plotters/plot_lengthscale_optimal_window_length_relation.py
@@ -20,7 +20,6 @@
         subset_dimensionality=data_dimensionality,
         hostname=socket.gethostname()
     )
-    n_time_series = int(data_dimensionality[1:])
     kernel_params_savedir = os.path.join(cfg['git-results-basedir'], 'kernel_analysis')
     kernel_params_df = pd.read_csv(
         os.path.join(kernel_params_savedir, f'{kernel_param:s}_kernel_params.csv'),

diff --git a/benchmarks/fmri/rs/HCP/plotters/plot_node_timeseries.py b/benchmarks/fmri/rs/HCP/plotters/plot_node_timeseries.py
@@ -29,11 +29,11 @@ def _plot_time_series(
     """
     figure_name_time_series = 'time_series.pdf'
 
-    n_time_series = y_locations.shape[1]
+    num_time_series = y_locations.shape[1]
 
     plt.figure(figsize=config_dict['plot-time-series-figsize'])
-    for i_time_series in range(n_time_series):
-        plt.subplot(n_time_series, 1, i_time_series+1)
+    for i_time_series in range(num_time_series):
+        plt.subplot(num_time_series, 1, i_time_series+1)
         plt.plot(
             x_plot, y_locations[:, i_time_series], 'x-',
             markersize=0, label=f'TS_{(i_time_series+1):d}'
@@ -92,11 +92,11 @@ def _plot_time_series(
                     x_train = x
                     y_train = y
 
-            n_time_steps = x_train.shape[0]
+            num_time_steps = x_train.shape[0]
             xx = convert_to_minutes(
                 x_train,
                 repetition_time=cfg['repetition-time'],
-                data_length=n_time_steps
+                data_length=num_time_steps
             )
             _plot_time_series(
                 config_dict=cfg,

diff --git a/benchmarks/fmri/rs/HCP/plotters/plot_sFC_estimates.py b/benchmarks/fmri/rs/HCP/plotters/plot_sFC_estimates.py
@@ -36,7 +36,7 @@ def _plot_static_functional_connectivity_estimate(
     re_ordered_correlation_matrix, new_rsn_names = reorder_ica_components(
         config_dict=config_dict,
         original_matrix=correlation_matrix,
-        n_time_series=num_time_series,
+        num_time_series=num_time_series,
     )
 
     plt.figure()
@@ -84,13 +84,13 @@ def _plot_static_functional_connectivity_estimate(
         subset_dimensionality=data_dimensionality,
         hostname=socket.gethostname()
     )
-    n_time_series = int(data_dimensionality[1:])
+    num_time_series = int(data_dimensionality[1:])
     scan_ids = cfg['scan-ids']
     subjects = get_human_connectome_project_subjects(
         data_dir=cfg['data-dir'], as_ints=True
     )
 
-    mean_sfc_estimate = np.zeros((n_time_series, n_time_series))
+    mean_sfc_estimate = np.zeros((num_time_series, num_time_series))
     for i_subject, subject in enumerate(subjects):
 
         print(f"\n> SUBJECT {i_subject+1:d}: {subject:d}\n")

diff --git a/...rs/HCP/subject_measure_prediction/compute_morphometricity_scores_TVFC_summary_measures.py b/...rs/HCP/subject_measure_prediction/compute_morphometricity_scores_TVFC_summary_measures.py
@@ -67,7 +67,7 @@
                     subjects_subset_list=all_subjects_list,
                     nuisance_variables=cfg['subject-measures-nuisance-variables'].copy(),  # do not edit original list
                     morphometricity_subject_measure=subject_measure
-                )  # (n_subjects, n_covariates)
+                )  # (num_subjects, num_covariates)
                 K = get_tvfc_estimates_similarity_matrix(
                     config_dict=cfg,
                     tvfc_summary_measure=tvfc_summary_measure,

diff --git a/...HCP/subject_measure_prediction/compute_morphometricity_scores_brain_state_switch_count.py b/...HCP/subject_measure_prediction/compute_morphometricity_scores_brain_state_switch_count.py
@@ -60,7 +60,7 @@
                 subjects_subset_list=all_subjects_list,
                 nuisance_variables=cfg['subject-measures-nuisance-variables'].copy(),  # do not edit original list
                 morphometricity_subject_measure=subject_measure
-            )  # (n_subjects, n_covariates)
+            )  # (num_subjects, num_covariates)
             K = get_brain_state_switch_count_similarity_matrix(
                 config_dict=cfg,
                 n_basis_states=3,