From 65f314bce4f3ff46e116d4453adb3d65047eb1f6 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 8 May 2020 13:45:17 -0400 Subject: [PATCH 01/28] Start working on BIDS dataset-based multi-run converter. --- phys2bids/conversion.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 phys2bids/conversion.py diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py new file mode 100644 index 000000000..ee6ec4183 --- /dev/null +++ b/phys2bids/conversion.py @@ -0,0 +1,16 @@ +""" +1. Extract segments from physio file. + - Each segment must be named according to the onset time. +2. Extract trigger periods from physio segments. + - Onset + - Duration (low priority) +3. Scrape dicom directory + - Name + - Onset + - Duration +4. Calculate time between onsets of each pair of trigger periods. +5. Calculate time between onsets of each pair of scans. +6. Compare these time differences to maximize similarity between structures. +7. Assign scan names to trigger period, and infer times for other scan times + in cases where trigger failed. +""" From 69fecdb0335ed8c6276eb9c49fed258e3e0c2426 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 8 May 2020 14:33:04 -0400 Subject: [PATCH 02/28] Add some code. Not good code, but code. --- phys2bids/conversion.py | 160 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 150 insertions(+), 10 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index ee6ec4183..dc8704317 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -1,16 +1,156 @@ """ 1. Extract segments from physio file. - - Each segment must be named according to the onset time. + - Each segment must be named according to the onset time if there are + multiple segments. If there is only one segment, then there is no need + for a timestamp. 2. Extract trigger periods from physio segments. - Onset - - Duration (low priority) -3. Scrape dicom directory +3. Extract scan times - Name - - Onset - - Duration -4. Calculate time between onsets of each pair of trigger periods. -5. Calculate time between onsets of each pair of scans. -6. Compare these time differences to maximize similarity between structures. -7. Assign scan names to trigger period, and infer times for other scan times - in cases where trigger failed. + - Onset with subsecond resolution as close to trigger pulse as possible + - Duration (from TR * data dimension) +4. Calculate difference in time between each scan and each physio trigger. +5. Use differences to identify similar values across as many physio/scan pairs as possible. + - Physio may be missing if trigger failed. + - Physio may be delayed if task was manually trigger after scan began + (sometimes happens with resting-state scans where the task itself isn't + very important). + - Scan may be missing if it wasn't converted (e.g., a scan stopped early + and re-run). +6. Assign scan names to trigger period, infer times for other scan times + in cases where trigger failed, and ignore trigger periods associated with + scans that weren't kept in the BIDS dataset. """ +from bids import BIDSLayout +import json +import pandas as pd +import numpy as np + + +def extract_physio_onsets(f): + """ + TODO: Stitch segments together before extracting onsets. + """ + import bioread + from operator import itemgetter + from itertools import groupby + d = bioread.read_file(f) + c = d.channels[-1] + samplerate = 1. / c.samples_per_second + data = c.data + scan_idx = np.where(data > 0)[0] + groups = [] + for k, g in groupby(enumerate(scan_idx), lambda x: x[0]-x[1]): + groups.append(list(map(itemgetter(1), g))) + onsets = np.array([g[0] for g in groups]) + onsets_in_sec = onsets * samplerate + df = pd.DataFrame( + columns=['onset'], + data=onsets_in_sec, + ) + df['index'] = onsets + return df + + +def synchronize_onsets(phys_df, scan_df): + """There can be fewer physios than scans (task failed to trigger physio) + or fewer scans than physios (aborted scans are not retained in BIDS dataset). + + Both phys_df and scan_df should be sorted in chronological order (i.e., + ascending by "onset"). + Onsets are in seconds. The baseline doesn't matter. + """ + # Get difference between each scan onset and each physio trigger onset + diffs = np.zeros((scan_df.shape[0], phys_df.shape[0])) + for i, i_row in scan_df.iterrows(): + for j, j_row in phys_df.iterrows(): + onset_diff = i_row['onset'] - j_row['onset'] + diffs[i, j] = onset_diff + + # Find a scan onset for each physio onset where the time difference + # roughly matches up across as many physio onsets as possible + # Not necessarily *all* physio onsets, since sometimes scans are stopped + # early or re-run, and may not end up in the final BIDS dataset. + sel_rows = [] + for i_scan in range(diffs.shape[0]): + # difference between onset of scan and first physio + # if the scan corresponds to the physio, then this difference + # should be roughly equal for all corresponding scan/physio pairs + # TODO: Loop through physio onsets and combine findings across to account for dropped scans. + val = diffs[i_scan, 5] # if scan was dropped, this won't work. + + # find one row for each column + diffs_from_phys_onset = diffs - val + np.set_printoptions(suppress=True) + print(np.min(np.abs(diffs_from_phys_onset), axis=0)) + # Here we see one row (i_scan) with very low values (0-1) across many + # columns, indicating that that scan onset corresponds to the physio + # onset indexed in "val" + + diff_thresh = 5 # threshold for variability + idx = np.where(np.abs(diffs - val) < diff_thresh)[1] + print(idx) + if np.array_equal(idx, np.arange(diffs.shape[1])): + print('GOT IT: {} (row {})'.format(val, i_scan)) + sel_rows.append(i_row) + if len(sel_rows) != 1: + # We hope for one solution: one time-shift applied to the scan onsets + # to synchronize them with the physio onsets. + raise Exception('Bad sel_rows: {}'.format(len(sel_rows))) + sel_row = sel_rows[0] + clock_diff = scan_df.loc[sel_row, 'onset'] - phys_df.loc[0, 'onset'] + print('Scan onsets must be shifted {}s to match physio onsets'.format(clock_diff)) + + # Get onset of each scan in terms of the physio time series + scan_df['phys_onset'] = scan_df['onset'] - clock_diff + samplerate = ((phys_df.loc[1, 'index'] - phys_df.loc[0, 'index']) / + (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset'])) + scan_df['phys_index'] = (scan_df['phys_onset'] * samplerate).astype(int) + return scan_df + + +def stitch_segments(physio_data): + """Merge segments in BioPac physio file. The segments must be named with + timestamps, so that the actual time difference can be calculated and zeros + can be inserted in the gaps. + + Timestamps have second-level resolution, so stitching them together adds + uncertainty to timing, which should be accounted for in the onset + synchronization. + """ + pass + + +def split_physio(scan_df, physio_file): + """Extract timeseries associated with each scan. + Key in dict is scan name or physio filename and value is physio data in + some format. + Uses the onsets, durations, and filenames from scan_df, and the time series + data from physio_file. + """ + pass + + +def save_physio(physio_data_dict): + """Save split physio data to BIDS dataset. + """ + pass + + +def determine_scan_durations(scan_df): + """Extract scan durations by loading fMRI files/metadata and + multiplying TR by number of volumes. This can be used to determine the + endpoints for the physio files. + """ + pass + + +def workflow(physio_file, scans_file): + scan_df = pd.read_table(scans_file) + scan_df = determine_scan_durations(scan_df) + physio_df = extract_physio_onsets(physio_file) + scan_df = synchronize_onsets(phys_df, scan_df) + # Extract timeseries associated with each scan. Key in dict is scan name or + # physio filename and key is physio data in some format. + physio_data_dict = split_physio(scan_df, physio_file) + save_physio(dset, physio_data_dict) From 7c169c5c431a7252e0a53b75af6b58159a9a60a0 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 8 May 2020 15:08:32 -0400 Subject: [PATCH 03/28] Fix style issues. --- phys2bids/conversion.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index dc8704317..af7afb2c2 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -22,7 +22,6 @@ scans that weren't kept in the BIDS dataset. """ from bids import BIDSLayout -import json import pandas as pd import numpy as np @@ -39,9 +38,12 @@ def extract_physio_onsets(f): samplerate = 1. / c.samples_per_second data = c.data scan_idx = np.where(data > 0)[0] + # Get groups of consecutive numbers in index groups = [] - for k, g in groupby(enumerate(scan_idx), lambda x: x[0]-x[1]): + for k, g in groupby(enumerate(scan_idx), lambda x: x[0] - x[1]): groups.append(list(map(itemgetter(1), g))) + + # Extract onsets onsets = np.array([g[0] for g in groups]) onsets_in_sec = onsets * samplerate df = pd.DataFrame( @@ -76,7 +78,8 @@ def synchronize_onsets(phys_df, scan_df): # difference between onset of scan and first physio # if the scan corresponds to the physio, then this difference # should be roughly equal for all corresponding scan/physio pairs - # TODO: Loop through physio onsets and combine findings across to account for dropped scans. + # TODO: Loop through physio onsets and combine findings across to + # account for dropped scans. val = diffs[i_scan, 5] # if scan was dropped, this won't work. # find one row for each column @@ -145,12 +148,14 @@ def determine_scan_durations(scan_df): pass -def workflow(physio_file, scans_file): +def workflow(dset, physio_file, sub, ses=None): + layout = BIDSLayout(dset) + scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) scan_df = pd.read_table(scans_file) scan_df = determine_scan_durations(scan_df) physio_df = extract_physio_onsets(physio_file) - scan_df = synchronize_onsets(phys_df, scan_df) + scan_df = synchronize_onsets(physio_df, scan_df) # Extract timeseries associated with each scan. Key in dict is scan name or # physio filename and key is physio data in some format. physio_data_dict = split_physio(scan_df, physio_file) - save_physio(dset, physio_data_dict) + save_physio(layout, physio_data_dict) From 1b198c453bf51abb50ada6aaf9d0d6aef3a2c174 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 8 May 2020 20:42:31 -0400 Subject: [PATCH 04/28] Get synchronize_onsets basically working. --- phys2bids/conversion.py | 102 +++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 39 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index af7afb2c2..406f0e16b 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -62,57 +62,55 @@ def synchronize_onsets(phys_df, scan_df): ascending by "onset"). Onsets are in seconds. The baseline doesn't matter. """ - # Get difference between each scan onset and each physio trigger onset + # Get difference between each physio trigger onset and each scan onset diffs = np.zeros((scan_df.shape[0], phys_df.shape[0])) - for i, i_row in scan_df.iterrows(): - for j, j_row in phys_df.iterrows(): - onset_diff = i_row['onset'] - j_row['onset'] + for i, i_scan in scan_df.iterrows(): + for j, j_phys in phys_df.iterrows(): + onset_diff = j_phys['onset'] - i_scan['onset'] diffs[i, j] = onset_diff - # Find a scan onset for each physio onset where the time difference - # roughly matches up across as many physio onsets as possible - # Not necessarily *all* physio onsets, since sometimes scans are stopped - # early or re-run, and may not end up in the final BIDS dataset. + # Find the delay that gives the smallest difference between scan onsets + # and physio onsets sel_rows = [] + selected = (None, None) + thresh = 1000 for i_scan in range(diffs.shape[0]): - # difference between onset of scan and first physio - # if the scan corresponds to the physio, then this difference - # should be roughly equal for all corresponding scan/physio pairs - # TODO: Loop through physio onsets and combine findings across to - # account for dropped scans. - val = diffs[i_scan, 5] # if scan was dropped, this won't work. - - # find one row for each column - diffs_from_phys_onset = diffs - val - np.set_printoptions(suppress=True) - print(np.min(np.abs(diffs_from_phys_onset), axis=0)) - # Here we see one row (i_scan) with very low values (0-1) across many - # columns, indicating that that scan onset corresponds to the physio - # onset indexed in "val" - - diff_thresh = 5 # threshold for variability - idx = np.where(np.abs(diffs - val) < diff_thresh)[1] - print(idx) - if np.array_equal(idx, np.arange(diffs.shape[1])): - print('GOT IT: {} (row {})'.format(val, i_scan)) - sel_rows.append(i_row) - if len(sel_rows) != 1: - # We hope for one solution: one time-shift applied to the scan onsets - # to synchronize them with the physio onsets. - raise Exception('Bad sel_rows: {}'.format(len(sel_rows))) - sel_row = sel_rows[0] - clock_diff = scan_df.loc[sel_row, 'onset'] - phys_df.loc[0, 'onset'] - print('Scan onsets must be shifted {}s to match physio onsets'.format(clock_diff)) + for j_phys in range(diffs.shape[1]): + test_offset = diffs[i_scan, j_phys] + diffs_from_phys_onset = diffs - test_offset + diffs_from_abs = np.abs(diffs_from_phys_onset) + min_diff_row_idx = np.argmin(diffs_from_abs, axis=0) + min_diff_col_idx = np.arange(len(min_diff_row_idx)) + min_diffs = diffs_from_abs[min_diff_row_idx, min_diff_col_idx] + min_diff_sum = np.sum(min_diffs) + if min_diff_sum < thresh: + selected = (i_scan, j_phys) + thresh = min_diff_sum + print('Selected solution: {}'.format(selected)) + offset = diffs[selected[0], selected[1]] + + # Isolate close, but negative relative onsets, to ensure scan onsets are + # always before or at physio triggers. + close_thresh = 2 # threshold for "close" onsets + diffs_from_phys_onset = diffs - offset + min_diff_row_idx = np.argmin(np.abs(diffs_from_phys_onset), axis=0) + min_diff_col_idx = np.arange(len(min_diff_row_idx)) + min_diffs = diffs_from_phys_onset[min_diff_row_idx, min_diff_col_idx] + min_diffs_tmp = min_diffs[abs(min_diffs) <= close_thresh] + min_val = min(min_diffs_tmp) + min_diffs += min_val + offset += min_val + print('Scan DF should be adjusted forward by {} seconds'.format(offset)) # Get onset of each scan in terms of the physio time series - scan_df['phys_onset'] = scan_df['onset'] - clock_diff + scan_df['phys_onset'] = scan_df['onset'] + offset samplerate = ((phys_df.loc[1, 'index'] - phys_df.loc[0, 'index']) / (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset'])) scan_df['phys_index'] = (scan_df['phys_onset'] * samplerate).astype(int) return scan_df -def stitch_segments(physio_data): +def stitch_segments(physio_file): """Merge segments in BioPac physio file. The segments must be named with timestamps, so that the actual time difference can be calculated and zeros can be inserted in the gaps. @@ -121,7 +119,33 @@ def stitch_segments(physio_data): uncertainty to timing, which should be accounted for in the onset synchronization. """ - pass + import bioread + d = bioread.read_file(physio_file) + em_df = pd.DataFrame() + c = 0 + for em in d.event_markers: + print('{}: {}'.format(em.text, em.sample_index)) + try: + em_dt = datetime.strptime(em.text, '%a %b %d %Y %H:%M:%S') + except: + continue + em_df.loc[c, 'segment'] = em.text + em_df.loc[c, 'start_idx'] = em.sample_index + em_df.loc[c, 'onset_time'] = em_dt + c += 1 + + # segment timestamp resolution is one second + # we need to incorporate possible variability into that + idx_diff = em_df['start_idx'].diff() + time_diff = em_df['onset_time'].diff().dt.total_seconds() + + for i in range(em_df.shape[0] - 1): + time_pair_diff = time_diff.iloc[i+1] + idx_pair_diff = idx_diff.iloc[i+1] / d.samples_per_second + if abs(idx_pair_diff - time_pair_diff) > 2: + diff_diff_sec = time_pair_diff - idx_pair_diff + diff_diff_idx = diff_diff_sec * d.samples_per_second + # Now we have the sizes, we can load the data and insert zeros. def split_physio(scan_df, physio_file): From c0367c4ecb1d5ac260385baf8a68db23fa3037bd Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 9 May 2020 12:59:38 -0400 Subject: [PATCH 05/28] Add duration determination. --- phys2bids/conversion.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 406f0e16b..e550de7f3 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -21,9 +21,12 @@ in cases where trigger failed, and ignore trigger periods associated with scans that weren't kept in the BIDS dataset. """ +import os.path as op + from bids import BIDSLayout import pandas as pd import numpy as np +from datetime import datetime def extract_physio_onsets(f): @@ -71,7 +74,6 @@ def synchronize_onsets(phys_df, scan_df): # Find the delay that gives the smallest difference between scan onsets # and physio onsets - sel_rows = [] selected = (None, None) thresh = 1000 for i_scan in range(diffs.shape[0]): @@ -106,7 +108,8 @@ def synchronize_onsets(phys_df, scan_df): scan_df['phys_onset'] = scan_df['onset'] + offset samplerate = ((phys_df.loc[1, 'index'] - phys_df.loc[0, 'index']) / (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset'])) - scan_df['phys_index'] = (scan_df['phys_onset'] * samplerate).astype(int) + scan_df['index_onset'] = (scan_df['phys_onset'] * samplerate).astype(int) + scan_df['index_duration'] = (scan_df['duration'] * samplerate).astype(int) return scan_df @@ -140,8 +143,8 @@ def stitch_segments(physio_file): time_diff = em_df['onset_time'].diff().dt.total_seconds() for i in range(em_df.shape[0] - 1): - time_pair_diff = time_diff.iloc[i+1] - idx_pair_diff = idx_diff.iloc[i+1] / d.samples_per_second + time_pair_diff = time_diff.iloc[i + 1] + idx_pair_diff = idx_diff.iloc[i + 1] / d.samples_per_second if abs(idx_pair_diff - time_pair_diff) > 2: diff_diff_sec = time_pair_diff - idx_pair_diff diff_diff_idx = diff_diff_sec * d.samples_per_second @@ -164,19 +167,29 @@ def save_physio(physio_data_dict): pass -def determine_scan_durations(scan_df): +def determine_scan_durations(scan_df, layout): """Extract scan durations by loading fMRI files/metadata and multiplying TR by number of volumes. This can be used to determine the endpoints for the physio files. """ - pass + func_files = layout.get(datatype='func', suffix='bold', + extension=['nii.gz', 'nii']) + for func_file in func_files: + filename = op.join('func', func_file.filename) + n_vols = nib.load(func_file.path).shape[3] + tr = func_file.get_metadata()['RepetitionTime'] + duration = n_vols * tr + scan_df.loc[scan_df['filename'] == filename, 'duration'] = duration + return scan_df -def workflow(dset, physio_file, sub, ses=None): - layout = BIDSLayout(dset) +def workflow(bids_dir, physio_file, sub, ses=None): + layout = BIDSLayout(bids_dir) scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) scan_df = pd.read_table(scans_file) - scan_df = determine_scan_durations(scan_df) + scan_df = determine_scan_durations(scan_df, layout) + scan_df = scan_df.dropna(subset=['duration']) # limit to relevant scans + scan_df = scan_df.drop_duplicates(subset=['acq_time']) # for multi-contrast scans physio_df = extract_physio_onsets(physio_file) scan_df = synchronize_onsets(physio_df, scan_df) # Extract timeseries associated with each scan. Key in dict is scan name or From bd736e44f3d8b4e1f56c0af88d32b27019b38179 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 9 May 2020 22:46:12 -0400 Subject: [PATCH 06/28] Minor refactor. --- phys2bids/conversion.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index e550de7f3..1dc450263 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -31,6 +31,9 @@ def extract_physio_onsets(f): """ + Collect onsets from physio file, both in terms of seconds and time series + indices. + TODO: Replace file-loading with phys2bids physio_obj TODO: Stitch segments together before extracting onsets. """ import bioread @@ -61,25 +64,26 @@ def synchronize_onsets(phys_df, scan_df): """There can be fewer physios than scans (task failed to trigger physio) or fewer scans than physios (aborted scans are not retained in BIDS dataset). - Both phys_df and scan_df should be sorted in chronological order (i.e., - ascending by "onset"). Onsets are in seconds. The baseline doesn't matter. """ + phys_df = phys_df.sort_values(by=['onset']) + scan_df = scan_df.sort_values(by=['onset']) + # Get difference between each physio trigger onset and each scan onset - diffs = np.zeros((scan_df.shape[0], phys_df.shape[0])) + onset_diffs = np.zeros((scan_df.shape[0], phys_df.shape[0])) for i, i_scan in scan_df.iterrows(): for j, j_phys in phys_df.iterrows(): onset_diff = j_phys['onset'] - i_scan['onset'] - diffs[i, j] = onset_diff + onset_diffs[i, j] = onset_diff # Find the delay that gives the smallest difference between scan onsets # and physio onsets selected = (None, None) thresh = 1000 - for i_scan in range(diffs.shape[0]): - for j_phys in range(diffs.shape[1]): - test_offset = diffs[i_scan, j_phys] - diffs_from_phys_onset = diffs - test_offset + for i_scan in range(onset_diffs.shape[0]): + for j_phys in range(onset_diffs.shape[1]): + test_offset = onset_diffs[i_scan, j_phys] + diffs_from_phys_onset = onset_diffs - test_offset diffs_from_abs = np.abs(diffs_from_phys_onset) min_diff_row_idx = np.argmin(diffs_from_abs, axis=0) min_diff_col_idx = np.arange(len(min_diff_row_idx)) @@ -89,12 +93,12 @@ def synchronize_onsets(phys_df, scan_df): selected = (i_scan, j_phys) thresh = min_diff_sum print('Selected solution: {}'.format(selected)) - offset = diffs[selected[0], selected[1]] + offset = onset_diffs[selected[0], selected[1]] # Isolate close, but negative relative onsets, to ensure scan onsets are # always before or at physio triggers. close_thresh = 2 # threshold for "close" onsets - diffs_from_phys_onset = diffs - offset + diffs_from_phys_onset = onset_diffs - offset min_diff_row_idx = np.argmin(np.abs(diffs_from_phys_onset), axis=0) min_diff_col_idx = np.arange(len(min_diff_row_idx)) min_diffs = diffs_from_phys_onset[min_diff_row_idx, min_diff_col_idx] From 223adb1b737489bb3e94f151f9af896eea1aea53 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Mon, 11 May 2020 20:34:26 -0400 Subject: [PATCH 07/28] More documentation. --- phys2bids/conversion.py | 83 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 9 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 1dc450263..40b41f20e 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -29,7 +29,7 @@ from datetime import datetime -def extract_physio_onsets(f): +def extract_physio_onsets(physio_file): """ Collect onsets from physio file, both in terms of seconds and time series indices. @@ -39,11 +39,11 @@ def extract_physio_onsets(f): import bioread from operator import itemgetter from itertools import groupby - d = bioread.read_file(f) - c = d.channels[-1] - samplerate = 1. / c.samples_per_second - data = c.data - scan_idx = np.where(data > 0)[0] + physio_data = bioread.read_file(physio_file) + trigger_channel = physio_data.channels[-1] + samplerate = 1. / trigger_channel.samples_per_second + trigger_data = trigger_channel.data + scan_idx = np.where(trigger_data > 0)[0] # Get groups of consecutive numbers in index groups = [] for k, g in groupby(enumerate(scan_idx), lambda x: x[0] - x[1]): @@ -61,10 +61,30 @@ def extract_physio_onsets(f): def synchronize_onsets(phys_df, scan_df): - """There can be fewer physios than scans (task failed to trigger physio) + """Find matching scans and physio trigger periods from separate DataFrames. + + There can be fewer physios than scans (task failed to trigger physio) or fewer scans than physios (aborted scans are not retained in BIDS dataset). Onsets are in seconds. The baseline doesn't matter. + + Parameters + ---------- + phys_df : pandas.DataFrame + DataFrame with onsets of physio trigger periods, in seconds. The + baseline does not matter, so it is reasonable for the onsets to start + with zero. + scan_df : pandas.DataFrame + DataFrame with onsets and names of functional scans from BIDS dataset, + in seconds. The baseline does not matter, so it is reasonable for the + onsets to start with zero. + + Returns + ------- + scan_df : pandas.DataFrame + Updated scan DataFrame, now with columns for predicted physio onsets in + seconds and in indices of the physio trigger channel, as well as scan + duration in units of the physio trigger channel. """ phys_df = phys_df.sort_values(by=['onset']) scan_df = scan_df.sort_values(by=['onset']) @@ -117,7 +137,7 @@ def synchronize_onsets(phys_df, scan_df): return scan_df -def stitch_segments(physio_file): +def merge_segments(physio_file): """Merge segments in BioPac physio file. The segments must be named with timestamps, so that the actual time difference can be calculated and zeros can be inserted in the gaps. @@ -155,12 +175,25 @@ def stitch_segments(physio_file): # Now we have the sizes, we can load the data and insert zeros. -def split_physio(scan_df, physio_file): +def split_physio(scan_df, physio_file, time_before=6, time_after=6): """Extract timeseries associated with each scan. Key in dict is scan name or physio filename and value is physio data in some format. Uses the onsets, durations, and filenames from scan_df, and the time series data from physio_file. + + Parameters + ---------- + scan_df : pandas.DataFrame + physio_file : str + time_before : float + time_after : float + + Returns + ------- + physio_data_dict : dict + Dictionary containing physio run names as keys and associated segments + as values. """ pass @@ -175,6 +208,20 @@ def determine_scan_durations(scan_df, layout): """Extract scan durations by loading fMRI files/metadata and multiplying TR by number of volumes. This can be used to determine the endpoints for the physio files. + + Parameters + ---------- + scan_df : pandas.DataFrame + Scans DataFrame containing functional scan filenames and onset times. + layout : bids.layout.BIDSLayout + Dataset layout. Used to identify functional scans and load them to + determine scan durations. + + Returns + ------- + scan_df : pandas.DataFrame + Updated DataFrame with new "duration" column. Calculated durations are + in seconds. """ func_files = layout.get(datatype='func', suffix='bold', extension=['nii.gz', 'nii']) @@ -188,6 +235,24 @@ def determine_scan_durations(scan_df, layout): def workflow(bids_dir, physio_file, sub, ses=None): + """A potential workflow for running physio/scan onset synchronization and + BIDSification. This workflow writes out physio files to a BIDS dataset. + + Parameters + ---------- + bids_dir : str + Path to BIDS dataset + physio_file : str or list of str + Either a single BioPac physio file or multiple physio files from the + same scanning session. Each file *must* contain multiple physio trigger + periods associated with scans. If multiple files are provided, they + must have timestamped segments. + sub : str + Subject ID. Used to search the BIDS dataset for relevant scans. + ses : str or None, optional + Session ID. Used to search the BIDS dataset for relevant scans in + longitudinal studies. Default is None. + """ layout = BIDSLayout(bids_dir) scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) scan_df = pd.read_table(scans_file) From 7b262ba4fc84cf846b88e23e11205c1c1bb11bd8 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 12 May 2020 10:56:33 -0400 Subject: [PATCH 08/28] Some more fixes. --- phys2bids/conversion.py | 64 ++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 40b41f20e..49059957a 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -26,6 +26,7 @@ from bids import BIDSLayout import pandas as pd import numpy as np +import nibabel as nib from datetime import datetime @@ -52,11 +53,14 @@ def extract_physio_onsets(physio_file): # Extract onsets onsets = np.array([g[0] for g in groups]) onsets_in_sec = onsets * samplerate + durations = np.array([g[-1] - g[0] for g in groups]) + durations_in_sec = durations * samplerate df = pd.DataFrame( columns=['onset'], data=onsets_in_sec, ) df['index'] = onsets + df['duration'] = durations_in_sec return df @@ -204,18 +208,18 @@ def save_physio(physio_data_dict): pass -def determine_scan_durations(scan_df, layout): +def determine_scan_durations(layout, scan_df): """Extract scan durations by loading fMRI files/metadata and multiplying TR by number of volumes. This can be used to determine the endpoints for the physio files. Parameters ---------- - scan_df : pandas.DataFrame - Scans DataFrame containing functional scan filenames and onset times. layout : bids.layout.BIDSLayout Dataset layout. Used to identify functional scans and load them to determine scan durations. + scan_df : pandas.DataFrame + Scans DataFrame containing functional scan filenames and onset times. Returns ------- @@ -223,17 +227,55 @@ def determine_scan_durations(scan_df, layout): Updated DataFrame with new "duration" column. Calculated durations are in seconds. """ + # TODO: parse entities in func files for searches instead of larger search. func_files = layout.get(datatype='func', suffix='bold', - extension=['nii.gz', 'nii']) + extension=['nii.gz', 'nii'], + sub=sub, ses=ses) for func_file in func_files: filename = op.join('func', func_file.filename) - n_vols = nib.load(func_file.path).shape[3] - tr = func_file.get_metadata()['RepetitionTime'] - duration = n_vols * tr - scan_df.loc[scan_df['filename'] == filename, 'duration'] = duration + if filename in scan_df['filename']: + n_vols = nib.load(func_file.path).shape[3] + tr = func_file.get_metadata()['RepetitionTime'] + duration = n_vols * tr + scan_df.loc[scan_df['filename'] == filename, 'duration'] = duration + else: + print('Skipping {}'.format(filename)) return scan_df +def load_scan_data(layout, sub, ses): + """ + """ + # scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) + # df = pd.read_table(scans_file) + + # Collect acquisition times + # Will be replaced with scans file if heudiconv makes the change + img_files = layout.get(datatype='func', suffix='bold', + extension=['nii.gz', 'nii'], + sub=sub, ses=ses) + df = pd.DataFrame( + columns=['filename', 'acq_time'], + ) + for i, img_file in enumerate(img_files): + df.loc[i, 'filename'] = op.join('func', img_file.filename) + df.loc[i, 'acq_time'] = img_file.get_metadata()['AcquisitionTime'] + + # Now back to general-purpose code + df = determine_scan_durations(layout, df) + df = df.dropna(subset=['duration']) # limit to relevant scans + # TODO: Drop duplicates at second-level resolution. In case echoes are + # acquired at ever-so-slightly different times. + df = df.drop_duplicates(subset=['acq_time']) # for multi-contrast scans + + # convert scan times to relative onsets (first scan is at 0 seconds) + df['acq_time'] = pd.to_datetime(df['acq_time']) + df = df.sort_values(by='acq_time') + df['onset'] = (df['acq_time'] - df['acq_time'].min()) + df['onset'] = df['onset'].dt.total_seconds() + return df + + def workflow(bids_dir, physio_file, sub, ses=None): """A potential workflow for running physio/scan onset synchronization and BIDSification. This workflow writes out physio files to a BIDS dataset. @@ -254,11 +296,7 @@ def workflow(bids_dir, physio_file, sub, ses=None): longitudinal studies. Default is None. """ layout = BIDSLayout(bids_dir) - scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) - scan_df = pd.read_table(scans_file) - scan_df = determine_scan_durations(scan_df, layout) - scan_df = scan_df.dropna(subset=['duration']) # limit to relevant scans - scan_df = scan_df.drop_duplicates(subset=['acq_time']) # for multi-contrast scans + scan_df = load_scan_data(layout, sub=sub, ses=ses) physio_df = extract_physio_onsets(physio_file) scan_df = synchronize_onsets(physio_df, scan_df) # Extract timeseries associated with each scan. Key in dict is scan name or From 7191a3ea9424cde69291bfaf4ae06d79bc11fe0b Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 12 May 2020 11:22:54 -0400 Subject: [PATCH 09/28] Fix bugs and include echo keyword. Will need to drop before general use, but there is a plan in place. Just not sure how to implement it yet. --- phys2bids/conversion.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 49059957a..4adc1537c 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -208,7 +208,7 @@ def save_physio(physio_data_dict): pass -def determine_scan_durations(layout, scan_df): +def determine_scan_durations(layout, scan_df, sub, ses): """Extract scan durations by loading fMRI files/metadata and multiplying TR by number of volumes. This can be used to determine the endpoints for the physio files. @@ -230,10 +230,11 @@ def determine_scan_durations(layout, scan_df): # TODO: parse entities in func files for searches instead of larger search. func_files = layout.get(datatype='func', suffix='bold', extension=['nii.gz', 'nii'], - sub=sub, ses=ses) + sub=sub, ses=ses, echo=1) + scan_df['duration'] = None for func_file in func_files: filename = op.join('func', func_file.filename) - if filename in scan_df['filename']: + if filename in scan_df['filename'].values: n_vols = nib.load(func_file.path).shape[3] tr = func_file.get_metadata()['RepetitionTime'] duration = n_vols * tr @@ -246,14 +247,18 @@ def determine_scan_durations(layout, scan_df): def load_scan_data(layout, sub, ses): """ """ + # This is the strategy we'll use in the future. Commented out for now. # scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) # df = pd.read_table(scans_file) # Collect acquisition times - # Will be replaced with scans file if heudiconv makes the change + # NOTE: Will be replaced with scans file if heudiconv makes the change + # NOTE: Currently keeping echo in to work around bug. Basically echoes are + # acquired at slightly different times, so we need to get the min + # AcquisitionTime across multi-contrast scans like multi-echo at this step. img_files = layout.get(datatype='func', suffix='bold', extension=['nii.gz', 'nii'], - sub=sub, ses=ses) + sub=sub, ses=ses, echo=1) df = pd.DataFrame( columns=['filename', 'acq_time'], ) @@ -262,13 +267,13 @@ def load_scan_data(layout, sub, ses): df.loc[i, 'acq_time'] = img_file.get_metadata()['AcquisitionTime'] # Now back to general-purpose code - df = determine_scan_durations(layout, df) + df = determine_scan_durations(layout, df, sub=sub, ses=ses) df = df.dropna(subset=['duration']) # limit to relevant scans - # TODO: Drop duplicates at second-level resolution. In case echoes are + # TODO: Drop duplicates at second-level resolution. Because echoes are # acquired at ever-so-slightly different times. df = df.drop_duplicates(subset=['acq_time']) # for multi-contrast scans - # convert scan times to relative onsets (first scan is at 0 seconds) + # Convert scan times to relative onsets (first scan is at 0 seconds) df['acq_time'] = pd.to_datetime(df['acq_time']) df = df.sort_values(by='acq_time') df['onset'] = (df['acq_time'] - df['acq_time'].min()) From d15ad97690b8d58e0a3d49d06b4a0558eaa3c89f Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Mon, 18 May 2020 11:48:32 -0400 Subject: [PATCH 10/28] More flexible trigger channel selection for debugging. --- phys2bids/conversion.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 4adc1537c..8f67e967b 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -41,7 +41,10 @@ def extract_physio_onsets(physio_file): from operator import itemgetter from itertools import groupby physio_data = bioread.read_file(physio_file) - trigger_channel = physio_data.channels[-1] + is_trigger = ['trig' in c.name.lower() for c in physio_data.channels] + trigger_idx = np.where(is_trigger)[0] + trigger_idx = trigger_idx[0] if len(trigger_idx) else -1 + trigger_channel = physio_data.channels[trigger_idx] samplerate = 1. / trigger_channel.samples_per_second trigger_data = trigger_channel.data scan_idx = np.where(trigger_data > 0)[0] From eb46a71d7881c4dcd0c5e70469d88cb99bb01f1c Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 18 Jun 2020 11:59:57 -0400 Subject: [PATCH 11/28] More progress on multi-run conversion. --- phys2bids/conversion.py | 83 ++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 8f67e967b..4029b0ca6 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -22,32 +22,24 @@ scans that weren't kept in the BIDS dataset. """ import os.path as op +from operator import itemgetter +from itertools import groupby +from datetime import datetime from bids import BIDSLayout import pandas as pd import numpy as np import nibabel as nib -from datetime import datetime -def extract_physio_onsets(physio_file): +def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): """ Collect onsets from physio file, both in terms of seconds and time series indices. - TODO: Replace file-loading with phys2bids physio_obj TODO: Stitch segments together before extracting onsets. """ - import bioread - from operator import itemgetter - from itertools import groupby - physio_data = bioread.read_file(physio_file) - is_trigger = ['trig' in c.name.lower() for c in physio_data.channels] - trigger_idx = np.where(is_trigger)[0] - trigger_idx = trigger_idx[0] if len(trigger_idx) else -1 - trigger_channel = physio_data.channels[trigger_idx] - samplerate = 1. / trigger_channel.samples_per_second - trigger_data = trigger_channel.data - scan_idx = np.where(trigger_data > 0)[0] + samplerate = 1. / freq + scan_idx = np.where(trigger_timeseries > 0)[0] # Get groups of consecutive numbers in index groups = [] for k, g in groupby(enumerate(scan_idx), lambda x: x[0] - x[1]): @@ -68,23 +60,26 @@ def extract_physio_onsets(physio_file): def synchronize_onsets(phys_df, scan_df): - """Find matching scans and physio trigger periods from separate DataFrames. + """Find matching scans and physio trigger periods from separate DataFrames, + using time differences within each DataFrame. There can be fewer physios than scans (task failed to trigger physio) or fewer scans than physios (aborted scans are not retained in BIDS dataset). - Onsets are in seconds. The baseline doesn't matter. + Onsets are in seconds. The baseline (i.e., absolute timing) doesn't matter. + Relative timing is all that matters. Parameters ---------- phys_df : pandas.DataFrame DataFrame with onsets of physio trigger periods, in seconds. The baseline does not matter, so it is reasonable for the onsets to start - with zero. + with zero. The following columns are required: 'onset', 'index'. scan_df : pandas.DataFrame DataFrame with onsets and names of functional scans from BIDS dataset, in seconds. The baseline does not matter, so it is reasonable for the - onsets to start with zero. + onsets to start with zero. The following columns are required: 'onset', + 'duration'. Returns ------- @@ -124,7 +119,7 @@ def synchronize_onsets(phys_df, scan_df): # Isolate close, but negative relative onsets, to ensure scan onsets are # always before or at physio triggers. - close_thresh = 2 # threshold for "close" onsets + close_thresh = 2 # threshold for "close" onsets, in seconds diffs_from_phys_onset = onset_diffs - offset min_diff_row_idx = np.argmin(np.abs(diffs_from_phys_onset), axis=0) min_diff_col_idx = np.arange(len(min_diff_row_idx)) @@ -141,6 +136,7 @@ def synchronize_onsets(phys_df, scan_df): (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset'])) scan_df['index_onset'] = (scan_df['phys_onset'] * samplerate).astype(int) scan_df['index_duration'] = (scan_df['duration'] * samplerate).astype(int) + scan_df['index_offset'] = scan_df['index_onset'] + scan_df['index_duration'] return scan_df @@ -152,6 +148,8 @@ def merge_segments(physio_file): Timestamps have second-level resolution, so stitching them together adds uncertainty to timing, which should be accounted for in the onset synchronization. + + NOTE: NOT WORKING """ import bioread d = bioread.read_file(physio_file) @@ -182,7 +180,7 @@ def merge_segments(physio_file): # Now we have the sizes, we can load the data and insert zeros. -def split_physio(scan_df, physio_file, time_before=6, time_after=6): +def split_physio(physio, split_times, time_before=6, time_after=6): """Extract timeseries associated with each scan. Key in dict is scan name or physio filename and value is physio data in some format. @@ -191,10 +189,14 @@ def split_physio(scan_df, physio_file, time_before=6, time_after=6): Parameters ---------- - scan_df : pandas.DataFrame - physio_file : str + physio_file : BlueprintInput + split_times : list of tuple time_before : float + Amount of time, in seconds, to retain in physio time series *before* + scan. time_after : float + Amount of time, in seconds, to retain in physio time series *after* + scan. Returns ------- @@ -205,7 +207,7 @@ def split_physio(scan_df, physio_file, time_before=6, time_after=6): pass -def save_physio(physio_data_dict): +def save_physio(fn, physio_data): """Save split physio data to BIDS dataset. """ pass @@ -248,7 +250,20 @@ def determine_scan_durations(layout, scan_df, sub, ses): def load_scan_data(layout, sub, ses): - """ + """Extract subject- and session-specific scan onsets and durations from + BIDSLayout. + + Parameters + ---------- + layout : BIDSLayout + sub : str + ses : str + + Returns + ------- + df : pandas.DataFrame + DataFrame with the following columns: 'filename', 'acq_time', + 'duration', 'onset'. """ # This is the strategy we'll use in the future. Commented out for now. # scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) @@ -284,7 +299,7 @@ def load_scan_data(layout, sub, ses): return df -def workflow(bids_dir, physio_file, sub, ses=None): +def workflow(bids_dir, physio_file, chtrig, sub, ses=None): """A potential workflow for running physio/scan onset synchronization and BIDSification. This workflow writes out physio files to a BIDS dataset. @@ -305,9 +320,17 @@ def workflow(bids_dir, physio_file, sub, ses=None): """ layout = BIDSLayout(bids_dir) scan_df = load_scan_data(layout, sub=sub, ses=ses) - physio_df = extract_physio_onsets(physio_file) + physio = populate_phys_input(physio_file, chtrig) + # physio = merge_physio_segments(physio) + trigger_timeseries = physio.timeseries[chtrig] + freq = physio.freq[chtrig] + physio_df = extract_physio_onsets(trigger_timeseries, freq=freq) scan_df = synchronize_onsets(physio_df, scan_df) - # Extract timeseries associated with each scan. Key in dict is scan name or - # physio filename and key is physio data in some format. - physio_data_dict = split_physio(scan_df, physio_file) - save_physio(layout, physio_data_dict) + for i_row, row in scan_df.iterrows(): + base_fname = row['filename'] + split_times = (row['index_onset'], row['index_offset']) + physio_split = split_physio(physio, split_times) + # we need a more elegant approach that drops multi-contrast entities and + # allows for other base datatypes + out_fname = base_fname.replace('_bold.nii.gz', '_physio.tsv.gz') + save_physio(out_fname, physio_split) From 9a5ce3bcf43f612c0a4c47e66ef6f77e5fa355a4 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 3 Jul 2020 16:09:09 -0400 Subject: [PATCH 12/28] Workflow is working now. --- phys2bids/conversion.py | 87 +++++++++++++--------------- phys2bids/slice4phys.py | 122 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 155 insertions(+), 54 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 4029b0ca6..21ad1b795 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -21,16 +21,23 @@ in cases where trigger failed, and ignore trigger periods associated with scans that weren't kept in the BIDS dataset. """ +import re import os.path as op from operator import itemgetter from itertools import groupby from datetime import datetime +import logging from bids import BIDSLayout import pandas as pd import numpy as np import nibabel as nib +from .slice4phys import update_name, slice_phys +from .interfaces.acq import populate_phys_input + +LGR = logging.getLogger(__name__) + def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): """ @@ -180,39 +187,21 @@ def merge_segments(physio_file): # Now we have the sizes, we can load the data and insert zeros. -def split_physio(physio, split_times, time_before=6, time_after=6): - """Extract timeseries associated with each scan. - Key in dict is scan name or physio filename and value is physio data in - some format. - Uses the onsets, durations, and filenames from scan_df, and the time series - data from physio_file. - - Parameters - ---------- - physio_file : BlueprintInput - split_times : list of tuple - time_before : float - Amount of time, in seconds, to retain in physio time series *before* - scan. - time_after : float - Amount of time, in seconds, to retain in physio time series *after* - scan. - - Returns - ------- - physio_data_dict : dict - Dictionary containing physio run names as keys and associated segments - as values. - """ - pass - - def save_physio(fn, physio_data): """Save split physio data to BIDS dataset. """ pass +def drop_bids_multicontrast_keys(fname): + dirname = op.dirname(fname) + fname = op.basename(fname) + multi_contrast_entities = ['echo', 'part', 'fa', 'inv', 'ch'] + regex = '_({})-[0-9a-zA-Z]+'.format('|'.join(multi_contrast_entities)) + fname = re.sub(regex, '', fname) + return op.join(dirname, fname) + + def determine_scan_durations(layout, scan_df, sub, ses): """Extract scan durations by loading fMRI files/metadata and multiplying TR by number of volumes. This can be used to determine the @@ -235,17 +224,17 @@ def determine_scan_durations(layout, scan_df, sub, ses): # TODO: parse entities in func files for searches instead of larger search. func_files = layout.get(datatype='func', suffix='bold', extension=['nii.gz', 'nii'], - sub=sub, ses=ses, echo=1) + sub=sub, ses=ses) scan_df['duration'] = None for func_file in func_files: filename = op.join('func', func_file.filename) - if filename in scan_df['filename'].values: + if filename in scan_df['original_filename'].values: n_vols = nib.load(func_file.path).shape[3] tr = func_file.get_metadata()['RepetitionTime'] duration = n_vols * tr - scan_df.loc[scan_df['filename'] == filename, 'duration'] = duration + scan_df.loc[scan_df['original_filename'] == filename, 'duration'] = duration else: - print('Skipping {}'.format(filename)) + LGR.info('Skipping {}'.format(filename)) return scan_df @@ -276,24 +265,27 @@ def load_scan_data(layout, sub, ses): # AcquisitionTime across multi-contrast scans like multi-echo at this step. img_files = layout.get(datatype='func', suffix='bold', extension=['nii.gz', 'nii'], - sub=sub, ses=ses, echo=1) + sub=sub, ses=ses) df = pd.DataFrame( - columns=['filename', 'acq_time'], + columns=['original_filename', 'acq_time'], ) for i, img_file in enumerate(img_files): - df.loc[i, 'filename'] = op.join('func', img_file.filename) + df.loc[i, 'original_filename'] = op.join('func', img_file.filename) df.loc[i, 'acq_time'] = img_file.get_metadata()['AcquisitionTime'] + # Get generic filenames (without within-acquisition entities like echo) + df['filename'] = df['original_filename'].apply(drop_bids_multicontrast_keys) + + # Get "first" scan from multi-file acquisitions + df['acq_time'] = pd.to_datetime(df['acq_time']) + df = df.sort_values(by='acq_time') + df = df.drop_duplicates(subset='filename', keep='first', ignore_index=True) + # Now back to general-purpose code df = determine_scan_durations(layout, df, sub=sub, ses=ses) df = df.dropna(subset=['duration']) # limit to relevant scans - # TODO: Drop duplicates at second-level resolution. Because echoes are - # acquired at ever-so-slightly different times. - df = df.drop_duplicates(subset=['acq_time']) # for multi-contrast scans # Convert scan times to relative onsets (first scan is at 0 seconds) - df['acq_time'] = pd.to_datetime(df['acq_time']) - df = df.sort_values(by='acq_time') df['onset'] = (df['acq_time'] - df['acq_time'].min()) df['onset'] = df['onset'].dt.total_seconds() return df @@ -322,15 +314,16 @@ def workflow(bids_dir, physio_file, chtrig, sub, ses=None): scan_df = load_scan_data(layout, sub=sub, ses=ses) physio = populate_phys_input(physio_file, chtrig) # physio = merge_physio_segments(physio) - trigger_timeseries = physio.timeseries[chtrig] - freq = physio.freq[chtrig] + + trigger_timeseries = physio.timeseries[physio.trigger_idx + 1] + freq = physio.freq[physio.trigger_idx + 1] physio_df = extract_physio_onsets(trigger_timeseries, freq=freq) scan_df = synchronize_onsets(physio_df, scan_df) + run_dict = {} for i_row, row in scan_df.iterrows(): - base_fname = row['filename'] + # we need something better for updating the fname here + base_fname = update_name(row['filename'], suffix='physio', extension='.tsv.gz') split_times = (row['index_onset'], row['index_offset']) - physio_split = split_physio(physio, split_times) - # we need a more elegant approach that drops multi-contrast entities and - # allows for other base datatypes - out_fname = base_fname.replace('_bold.nii.gz', '_physio.tsv.gz') - save_physio(out_fname, physio_split) + run_dict[base_fname] = split_times + phys_dict = slice_phys(physio, run_dict) + return phys_dict diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py index 573a7d1dc..429ad557e 100644 --- a/phys2bids/slice4phys.py +++ b/phys2bids/slice4phys.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- - +import re import logging from copy import deepcopy @@ -9,6 +9,68 @@ LGR = logging.getLogger(__name__) +def update_name(basename, **kwargs): + """ + Add entities, suffix, and/or extension to a BIDS filename while retaining + BIDS compatibility. + + TODO: Replace list of entities with versioned, yaml entity table from BIDS. + """ + import os.path as op + ENTITY_ORDER = ['sub', 'ses', 'task', 'acq', 'ce', 'rec', 'dir', 'run', + 'mod', 'echo', 'recording', 'proc', 'space', 'split'] + + outdir = op.dirname(basename) + outname = op.basename(basename) + + # Determine scan suffix (should always be physio) + suffix = outname.split('_')[-1].split('.')[0] + extension = '.' + '.'.join(outname.split('_')[-1].split('.')[1:]) + filetype = suffix + extension + + for key, val in kwargs.items(): + if key == 'suffix': + if not val.startswith('_'): + val = '_' + val + + if not val.endswith('.'): + val = val + '.' + + outname = outname.replace('_' + suffix + '.', val) + elif key == 'extension': + if not val.startswith('.'): + val = '.' + val + outname = outname.replace(extension, val) + else: + if key not in ENTITY_ORDER: + raise ValueError('Key {} not understood.'.format(key)) + + # entities + if '_{}-{}'.format(key, val) in basename: + LGR.warning('Key {} already found in basename {}. ' + 'Skipping.'.format(key, basename)) + + elif '_{}-'.format(key) in basename: + LGR.warning('Key {} already found in basename {}. ' + 'Overwriting.'.format(key, basename)) + regex = '_{}-[0-9a-zA-Z]+'.format(key) + fname = re.sub(regex, '_{}-{}'.format(key, val), fname) + else: + loc = ENTITY_ORDER.index(key) + entities_to_check = ENTITY_ORDER[loc:] + entities_to_check = ['_{}-'.format(etc) for etc in entities_to_check] + entities_to_check.append('_{}'.format(filetype)) + for etc in entities_to_check: + if etc in outname: + outname = outname.replace( + etc, + '_{}-{}{}'.format(key, val, etc) + ) + break + outname = op.join(outdir, outname) + return outname + + def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9): """ Find runs slicing index. @@ -107,6 +169,55 @@ def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9): return run_timestamps +def slice_phys(phys, run_timestamps): + """ + Slice a physio object based on run-/file-wise onsets and offsets. + Adapted from slice4phys with the goal of modularizing slicing functionality + (i.e., cutting out the run detection step). + + Parameters + ---------- + phys : BlueprintInput + run_timestamps : dict + Each key is a run-wise filename and value is a tuple of (onset, offset), + where onset and offset are integers corresponding to index values of + the trigger channel. + + Returns + ------- + phys_in_slices : dict + Each key is a run-wise filename (possibly further split by frequency) + and each value is a BlueprintInput object. + """ + phys_in_slices = {} + for i_run, fname in enumerate(run_timestamps.keys()): + # tmp variable to collect run's info + run_attributes = run_timestamps[fname] + trigger_onset, trigger_offset = run_attributes + + unique_frequencies = np.unique(phys.freq) + trigger_freq = phys.freq[phys.trigger_idx + 1] + for freq in unique_frequencies: + # Get onset and offset for the requested frequency + if freq != trigger_freq: + onset = int(trigger_onset * trigger_freq / freq) # no clue if this is right + offset = int(trigger_offset * trigger_freq / freq) + else: + onset = trigger_onset + offset = trigger_offset + + # Split into frequency-specific object limited to onset-offset + if len(unique_frequencies) > 1: + run_fname = update_name(fname, recording=str(freq)+'Hz') + temp_phys_in = deepcopy(phys[onset:offset]) + not_freq = [i for i in range(len(phys.freq)) if phys.freq[i] != freq] + temp_phys_in.delete_at_index(not_freq) + phys_in_slices[run_fname] = temp_phys_in + else: + phys_in_slices[fname] = deepcopy(phys[onset:offset]) + return phys_in_slices + + def slice4phys(phys_in, ntp_list, tr_list, thr, padding=9): """ Slice runs for phys2bids. @@ -115,13 +226,10 @@ def slice4phys(phys_in, ntp_list, tr_list, thr, padding=9): --------- phys_in: BlueprintInput object Object returned by BlueprintInput class - ntp_list: list - a list of integers given by the user as `ntp` input + nsec_list: list + a list of floats given by the user as `nsec` input Default: [0, ] - tr_list: list - a list of float given by the user as `tr` input - Default: [1,] - padding: int + padding: int, optional extra time at beginning and end of timeseries, expressed in seconds (s) Default: 9 From 26f8aae8b87d1a0317d7f58de7a4346eea65f1b4 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 3 Jul 2020 16:19:09 -0400 Subject: [PATCH 13/28] Fix style issues. --- phys2bids/conversion.py | 8 +++++--- phys2bids/slice4phys.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 21ad1b795..444ca282c 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -139,8 +139,9 @@ def synchronize_onsets(phys_df, scan_df): # Get onset of each scan in terms of the physio time series scan_df['phys_onset'] = scan_df['onset'] + offset - samplerate = ((phys_df.loc[1, 'index'] - phys_df.loc[0, 'index']) / - (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset'])) + rise = (phys_df.loc[1, 'index'] - phys_df.loc[0, 'index']) + run = (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset']) + samplerate = rise / run scan_df['index_onset'] = (scan_df['phys_onset'] * samplerate).astype(int) scan_df['index_duration'] = (scan_df['duration'] * samplerate).astype(int) scan_df['index_offset'] = scan_df['index_onset'] + scan_df['index_duration'] @@ -166,7 +167,7 @@ def merge_segments(physio_file): print('{}: {}'.format(em.text, em.sample_index)) try: em_dt = datetime.strptime(em.text, '%a %b %d %Y %H:%M:%S') - except: + except ValueError: continue em_df.loc[c, 'segment'] = em.text em_df.loc[c, 'start_idx'] = em.sample_index @@ -185,6 +186,7 @@ def merge_segments(physio_file): diff_diff_sec = time_pair_diff - idx_pair_diff diff_diff_idx = diff_diff_sec * d.samples_per_second # Now we have the sizes, we can load the data and insert zeros. + return diff_diff_idx def save_physio(fn, physio_data): diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py index 429ad557e..1ed810550 100644 --- a/phys2bids/slice4phys.py +++ b/phys2bids/slice4phys.py @@ -54,7 +54,7 @@ def update_name(basename, **kwargs): LGR.warning('Key {} already found in basename {}. ' 'Overwriting.'.format(key, basename)) regex = '_{}-[0-9a-zA-Z]+'.format(key) - fname = re.sub(regex, '_{}-{}'.format(key, val), fname) + outname = re.sub(regex, '_{}-{}'.format(key, val), outname) else: loc = ENTITY_ORDER.index(key) entities_to_check = ENTITY_ORDER[loc:] @@ -208,7 +208,7 @@ def slice_phys(phys, run_timestamps): # Split into frequency-specific object limited to onset-offset if len(unique_frequencies) > 1: - run_fname = update_name(fname, recording=str(freq)+'Hz') + run_fname = update_name(fname, recording=str(freq) + 'Hz') temp_phys_in = deepcopy(phys[onset:offset]) not_freq = [i for i in range(len(phys.freq)) if phys.freq[i] != freq] temp_phys_in.delete_at_index(not_freq) From 18f2831763b38da3a4d9541fb83fac57da91948f Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 3 Jul 2020 19:07:12 -0400 Subject: [PATCH 14/28] Reorganize and improve documentation. --- phys2bids/conversion.py | 124 ++++++++++++++++------------------------ phys2bids/slice4phys.py | 65 +-------------------- phys2bids/utils.py | 98 +++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 136 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 444ca282c..6962629b9 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -35,6 +35,7 @@ from .slice4phys import update_name, slice_phys from .interfaces.acq import populate_phys_input +from .utils import drop_bids_multicontrast_keys LGR = logging.getLogger(__name__) @@ -43,7 +44,22 @@ def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): """ Collect onsets from physio file, both in terms of seconds and time series indices. - TODO: Stitch segments together before extracting onsets. + + Parameters + ---------- + trigger_timeseries : 1D numpy.ndarray + Trigger time series. + freq : float + Frequency of trigger time series, in Hertz. + threshold : float, optional + Threshold to apply to binarize trigger time series. + + Returns + ------- + df : pandas.DataFrame + DataFrame with one row for each trigger period and three columns: + onset (in seconds), index (onsets in index of time series array), + and duration (in seconds). """ samplerate = 1. / freq scan_idx = np.where(trigger_timeseries > 0)[0] @@ -67,7 +83,8 @@ def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): def synchronize_onsets(phys_df, scan_df): - """Find matching scans and physio trigger periods from separate DataFrames, + """ + Find matching scans and physio trigger periods from separate DataFrames, using time differences within each DataFrame. There can be fewer physios than scans (task failed to trigger physio) @@ -121,7 +138,7 @@ def synchronize_onsets(phys_df, scan_df): if min_diff_sum < thresh: selected = (i_scan, j_phys) thresh = min_diff_sum - print('Selected solution: {}'.format(selected)) + offset = onset_diffs[selected[0], selected[1]] # Isolate close, but negative relative onsets, to ensure scan onsets are @@ -135,7 +152,8 @@ def synchronize_onsets(phys_df, scan_df): min_val = min(min_diffs_tmp) min_diffs += min_val offset += min_val - print('Scan DF should be adjusted forward by {} seconds'.format(offset)) + LGR.info('Scan onsets should be adjusted forward by {} seconds to best ' + 'match physio onsets.'.format(offset)) # Get onset of each scan in terms of the physio time series scan_df['phys_onset'] = scan_df['onset'] + offset @@ -148,64 +166,9 @@ def synchronize_onsets(phys_df, scan_df): return scan_df -def merge_segments(physio_file): - """Merge segments in BioPac physio file. The segments must be named with - timestamps, so that the actual time difference can be calculated and zeros - can be inserted in the gaps. - - Timestamps have second-level resolution, so stitching them together adds - uncertainty to timing, which should be accounted for in the onset - synchronization. - - NOTE: NOT WORKING - """ - import bioread - d = bioread.read_file(physio_file) - em_df = pd.DataFrame() - c = 0 - for em in d.event_markers: - print('{}: {}'.format(em.text, em.sample_index)) - try: - em_dt = datetime.strptime(em.text, '%a %b %d %Y %H:%M:%S') - except ValueError: - continue - em_df.loc[c, 'segment'] = em.text - em_df.loc[c, 'start_idx'] = em.sample_index - em_df.loc[c, 'onset_time'] = em_dt - c += 1 - - # segment timestamp resolution is one second - # we need to incorporate possible variability into that - idx_diff = em_df['start_idx'].diff() - time_diff = em_df['onset_time'].diff().dt.total_seconds() - - for i in range(em_df.shape[0] - 1): - time_pair_diff = time_diff.iloc[i + 1] - idx_pair_diff = idx_diff.iloc[i + 1] / d.samples_per_second - if abs(idx_pair_diff - time_pair_diff) > 2: - diff_diff_sec = time_pair_diff - idx_pair_diff - diff_diff_idx = diff_diff_sec * d.samples_per_second - # Now we have the sizes, we can load the data and insert zeros. - return diff_diff_idx - - -def save_physio(fn, physio_data): - """Save split physio data to BIDS dataset. +def determine_scan_durations(layout, scan_df, sub, ses=None): """ - pass - - -def drop_bids_multicontrast_keys(fname): - dirname = op.dirname(fname) - fname = op.basename(fname) - multi_contrast_entities = ['echo', 'part', 'fa', 'inv', 'ch'] - regex = '_({})-[0-9a-zA-Z]+'.format('|'.join(multi_contrast_entities)) - fname = re.sub(regex, '', fname) - return op.join(dirname, fname) - - -def determine_scan_durations(layout, scan_df, sub, ses): - """Extract scan durations by loading fMRI files/metadata and + Extract scan durations by loading fMRI files/metadata and multiplying TR by number of volumes. This can be used to determine the endpoints for the physio files. @@ -216,6 +179,10 @@ def determine_scan_durations(layout, scan_df, sub, ses): determine scan durations. scan_df : pandas.DataFrame Scans DataFrame containing functional scan filenames and onset times. + sub : str + Subject ID. + ses : str or None, optional + Session ID. If None, then no session. Returns ------- @@ -240,31 +207,34 @@ def determine_scan_durations(layout, scan_df, sub, ses): return scan_df -def load_scan_data(layout, sub, ses): - """Extract subject- and session-specific scan onsets and durations from +def load_scan_data(layout, sub, ses=None): + """ + Extract subject- and session-specific scan onsets and durations from BIDSLayout. Parameters ---------- layout : BIDSLayout sub : str - ses : str + ses : str or None, optional Returns ------- df : pandas.DataFrame - DataFrame with the following columns: 'filename', 'acq_time', - 'duration', 'onset'. + DataFrame with the following columns: 'filename', 'original_filename', + 'acq_time', 'duration', 'onset'. + 'filename' is the name of the BIDS file minus within-acquisition entities. + 'original_filename' is the actual, existing BIDS file. + 'acq_time' is the acquisition time in datetime format. + 'duration' is the scan duration in seconds. + 'onset' is the scan onset in seconds, starting with zero at the first scan. """ # This is the strategy we'll use in the future. Commented out for now. # scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) # df = pd.read_table(scans_file) # Collect acquisition times - # NOTE: Will be replaced with scans file if heudiconv makes the change - # NOTE: Currently keeping echo in to work around bug. Basically echoes are - # acquired at slightly different times, so we need to get the min - # AcquisitionTime across multi-contrast scans like multi-echo at this step. + # NOTE: Will be replaced with scans file when heudiconv makes the change img_files = layout.get(datatype='func', suffix='bold', extension=['nii.gz', 'nii'], sub=sub, ses=ses) @@ -294,7 +264,8 @@ def load_scan_data(layout, sub, ses): def workflow(bids_dir, physio_file, chtrig, sub, ses=None): - """A potential workflow for running physio/scan onset synchronization and + """ + A potential workflow for running physio/scan onset synchronization and BIDSification. This workflow writes out physio files to a BIDS dataset. Parameters @@ -311,19 +282,24 @@ def workflow(bids_dir, physio_file, chtrig, sub, ses=None): ses : str or None, optional Session ID. Used to search the BIDS dataset for relevant scans in longitudinal studies. Default is None. + + Returns + ------- + out : dict + Keys are output filenames, while values are the chopped up BlueprintInput + objects. """ layout = BIDSLayout(bids_dir) scan_df = load_scan_data(layout, sub=sub, ses=ses) physio = populate_phys_input(physio_file, chtrig) - # physio = merge_physio_segments(physio) trigger_timeseries = physio.timeseries[physio.trigger_idx + 1] freq = physio.freq[physio.trigger_idx + 1] physio_df = extract_physio_onsets(trigger_timeseries, freq=freq) scan_df = synchronize_onsets(physio_df, scan_df) run_dict = {} - for i_row, row in scan_df.iterrows(): - # we need something better for updating the fname here + # could probably be replaced with apply() followed by to_dict() + for _, row in scan_df.iterrows(): base_fname = update_name(row['filename'], suffix='physio', extension='.tsv.gz') split_times = (row['index_onset'], row['index_offset']) run_dict[base_fname] = split_times diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py index 1ed810550..8e0bac7f4 100644 --- a/phys2bids/slice4phys.py +++ b/phys2bids/slice4phys.py @@ -3,72 +3,13 @@ import re import logging from copy import deepcopy +import os.path as op import numpy as np -LGR = logging.getLogger(__name__) - - -def update_name(basename, **kwargs): - """ - Add entities, suffix, and/or extension to a BIDS filename while retaining - BIDS compatibility. +from phys2bids.utils import update_name - TODO: Replace list of entities with versioned, yaml entity table from BIDS. - """ - import os.path as op - ENTITY_ORDER = ['sub', 'ses', 'task', 'acq', 'ce', 'rec', 'dir', 'run', - 'mod', 'echo', 'recording', 'proc', 'space', 'split'] - - outdir = op.dirname(basename) - outname = op.basename(basename) - - # Determine scan suffix (should always be physio) - suffix = outname.split('_')[-1].split('.')[0] - extension = '.' + '.'.join(outname.split('_')[-1].split('.')[1:]) - filetype = suffix + extension - - for key, val in kwargs.items(): - if key == 'suffix': - if not val.startswith('_'): - val = '_' + val - - if not val.endswith('.'): - val = val + '.' - - outname = outname.replace('_' + suffix + '.', val) - elif key == 'extension': - if not val.startswith('.'): - val = '.' + val - outname = outname.replace(extension, val) - else: - if key not in ENTITY_ORDER: - raise ValueError('Key {} not understood.'.format(key)) - - # entities - if '_{}-{}'.format(key, val) in basename: - LGR.warning('Key {} already found in basename {}. ' - 'Skipping.'.format(key, basename)) - - elif '_{}-'.format(key) in basename: - LGR.warning('Key {} already found in basename {}. ' - 'Overwriting.'.format(key, basename)) - regex = '_{}-[0-9a-zA-Z]+'.format(key) - outname = re.sub(regex, '_{}-{}'.format(key, val), outname) - else: - loc = ENTITY_ORDER.index(key) - entities_to_check = ENTITY_ORDER[loc:] - entities_to_check = ['_{}-'.format(etc) for etc in entities_to_check] - entities_to_check.append('_{}'.format(filetype)) - for etc in entities_to_check: - if etc in outname: - outname = outname.replace( - etc, - '_{}-{}{}'.format(key, val, etc) - ) - break - outname = op.join(outdir, outname) - return outname +LGR = logging.getLogger(__name__) def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9): diff --git a/phys2bids/utils.py b/phys2bids/utils.py index 47051961f..3efeac0cc 100644 --- a/phys2bids/utils.py +++ b/phys2bids/utils.py @@ -12,6 +12,104 @@ SUPPORTED_FTYPES = ('acq', 'txt') # 'mat', ... +def drop_bids_multicontrast_keys(fname): + """ + Use regular expressions to remove within-acquisition entities (e.g., echo) + from a BIDS filename. + + Parameters + ---------- + fname : str + Valid BIDS filename. + + Returns + ------- + fname : str + Valid BIDS filename, minus the within-acquisition entities. + """ + dirname = op.dirname(fname) + fname = op.basename(fname) + multi_contrast_entities = ['echo', 'part', 'fa', 'inv', 'ch'] + regex = '_({})-[0-9a-zA-Z]+'.format('|'.join(multi_contrast_entities)) + fname = re.sub(regex, '', fname) + fname = op.join(dirname, fname) + return fname + + +def update_name(basename, **kwargs): + """ + Add entities, suffix, and/or extension to a BIDS filename while retaining + BIDS compatibility. + + Parameters + ---------- + basename : str + Name of valid BIDS file. + kwargs : dict + Keyword arguments indicating entities and/or suffix and/or extension + to update/add to the BIDS filename. + + Returns + ------- + outname : str + Valid BIDS filename with updated entities/suffix/extension. + """ + # This is hardcoded, but would be nice to use the yaml-fied BIDS entity + # table when that's up and running. + ENTITY_ORDER = ['sub', 'ses', 'task', 'acq', 'ce', 'rec', 'dir', 'run', + 'mod', 'echo', 'recording', 'proc', 'space', 'split'] + + outdir = op.dirname(basename) + outname = op.basename(basename) + + # Determine scan suffix (should always be physio) + suffix = outname.split('_')[-1].split('.')[0] + extension = '.' + '.'.join(outname.split('_')[-1].split('.')[1:]) + filetype = suffix + extension + + for key, val in kwargs.items(): + if key == 'suffix': + if not val.startswith('_'): + val = '_' + val + + if not val.endswith('.'): + val = val + '.' + + outname = outname.replace('_' + suffix + '.', val) + elif key == 'extension': + if not val.startswith('.'): + val = '.' + val + outname = outname.replace(extension, val) + else: + if key not in ENTITY_ORDER: + raise ValueError('Key {} not understood.'.format(key)) + + # entities + if '_{}-{}'.format(key, val) in basename: + LGR.warning('Key {} already found in basename {}. ' + 'Skipping.'.format(key, basename)) + + elif '_{}-'.format(key) in basename: + LGR.warning('Key {} already found in basename {}. ' + 'Overwriting.'.format(key, basename)) + regex = '_{}-[0-9a-zA-Z]+'.format(key) + outname = re.sub(regex, '_{}-{}'.format(key, val), outname) + else: + loc = ENTITY_ORDER.index(key) + entities_to_check = ENTITY_ORDER[loc:] + entities_to_check = ['_{}-'.format(etc) for etc in entities_to_check] + entities_to_check.append('_{}'.format(filetype)) + for etc in entities_to_check: + if etc in outname: + outname = outname.replace( + etc, + '_{}-{}{}'.format(key, val, etc) + ) + break + outname = op.join(outdir, outname) + return outname + + def check_input_dir(indir): """ Checks that the given indir doesn't have a trailing `/` From 40fe26b86d464fad9aa872ba89562fc5ac28dce2 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 3 Jul 2020 22:02:03 -0400 Subject: [PATCH 15/28] Fix bugs. --- phys2bids/conversion.py | 209 ++++++++++++++++++++-------------------- phys2bids/phys2bids.py | 32 +----- phys2bids/physio_obj.py | 8 ++ phys2bids/slice4phys.py | 23 +++-- phys2bids/utils.py | 44 +++++++-- 5 files changed, 171 insertions(+), 145 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 6962629b9..07f196845 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -21,11 +21,9 @@ in cases where trigger failed, and ignore trigger periods associated with scans that weren't kept in the BIDS dataset. """ -import re import os.path as op from operator import itemgetter from itertools import groupby -from datetime import datetime import logging from bids import BIDSLayout @@ -36,10 +34,107 @@ from .slice4phys import update_name, slice_phys from .interfaces.acq import populate_phys_input from .utils import drop_bids_multicontrast_keys +from .physio_obj import BlueprintOutput LGR = logging.getLogger(__name__) +def load_scan_data(layout, sub, ses=None): + """ + Extract subject- and session-specific scan onsets and durations from BIDSLayout. + + Parameters + ---------- + layout : BIDSLayout + sub : str + ses : str or None, optional + + Returns + ------- + df : pandas.DataFrame + DataFrame with the following columns: 'filename', 'original_filename', + 'acq_time', 'duration', 'onset'. + 'filename' is the name of the BIDS file minus within-acquisition entities. + 'original_filename' is the actual, existing BIDS file. + 'acq_time' is the acquisition time in datetime format. + 'duration' is the scan duration in seconds. + 'onset' is the scan onset in seconds, starting with zero at the first scan. + """ + # This is the strategy we'll use in the future. Commented out for now. + # scans_file = layout.get(extension='.tsv', suffix='scans', subject=sub, session=ses) + # df = pd.read_table(scans_file) + + # Collect acquisition times + # NOTE: Will be replaced with scans file when heudiconv makes the change + img_files = layout.get(datatype='func', suffix='bold', + extension=['.nii.gz', '.nii'], + subject=sub, session=ses) + df = pd.DataFrame( + columns=['original_filename', 'acq_time'], + ) + for i, img_file in enumerate(img_files): + df.loc[i, 'original_filename'] = img_file.path + df.loc[i, 'acq_time'] = img_file.get_metadata()['AcquisitionTime'] + + # Get generic filenames (without within-acquisition entities like echo) + df['filename'] = df['original_filename'].apply(drop_bids_multicontrast_keys) + + # Get "first" scan from multi-file acquisitions + df['acq_time'] = pd.to_datetime(df['acq_time']) + df = df.sort_values(by='acq_time') + df = df.drop_duplicates(subset='filename', keep='first', ignore_index=True) + + # Now back to general-purpose code + df = determine_scan_durations(layout, df, sub=sub, ses=ses) + df = df.dropna(subset=['duration']) # limit to relevant scans + + # Convert scan times to relative onsets (first scan is at 0 seconds) + df['onset'] = (df['acq_time'] - df['acq_time'].min()) + df['onset'] = df['onset'].dt.total_seconds() + return df + + +def determine_scan_durations(layout, scan_df, sub, ses=None): + """ + Extract scan durations by loading fMRI files/metadata and + multiplying TR by number of volumes. This can be used to determine the + endpoints for the physio files. + + Parameters + ---------- + layout : bids.layout.BIDSLayout + Dataset layout. Used to identify functional scans and load them to + determine scan durations. + scan_df : pandas.DataFrame + Scans DataFrame containing functional scan filenames and onset times. + sub : str + Subject ID. + ses : str or None, optional + Session ID. If None, then no session. + + Returns + ------- + scan_df : pandas.DataFrame + Updated DataFrame with new "duration" column. Calculated durations are + in seconds. + """ + # TODO: parse entities in func files for searches instead of larger search. + func_files = layout.get(datatype='func', suffix='bold', + extension=['.nii.gz', '.nii'], + subject=sub, session=ses) + scan_df['duration'] = None + for func_file in func_files: + filename = func_file.path + if filename in scan_df['original_filename'].values: + n_vols = nib.load(filename).shape[3] + tr = func_file.get_metadata()['RepetitionTime'] + duration = n_vols * tr + scan_df.loc[scan_df['original_filename'] == filename, 'duration'] = duration + else: + LGR.info('Skipping {}'.format(op.basename(filename))) + return scan_df + + def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): """ Collect onsets from physio file, both in terms of seconds and time series @@ -166,103 +261,6 @@ def synchronize_onsets(phys_df, scan_df): return scan_df -def determine_scan_durations(layout, scan_df, sub, ses=None): - """ - Extract scan durations by loading fMRI files/metadata and - multiplying TR by number of volumes. This can be used to determine the - endpoints for the physio files. - - Parameters - ---------- - layout : bids.layout.BIDSLayout - Dataset layout. Used to identify functional scans and load them to - determine scan durations. - scan_df : pandas.DataFrame - Scans DataFrame containing functional scan filenames and onset times. - sub : str - Subject ID. - ses : str or None, optional - Session ID. If None, then no session. - - Returns - ------- - scan_df : pandas.DataFrame - Updated DataFrame with new "duration" column. Calculated durations are - in seconds. - """ - # TODO: parse entities in func files for searches instead of larger search. - func_files = layout.get(datatype='func', suffix='bold', - extension=['nii.gz', 'nii'], - sub=sub, ses=ses) - scan_df['duration'] = None - for func_file in func_files: - filename = op.join('func', func_file.filename) - if filename in scan_df['original_filename'].values: - n_vols = nib.load(func_file.path).shape[3] - tr = func_file.get_metadata()['RepetitionTime'] - duration = n_vols * tr - scan_df.loc[scan_df['original_filename'] == filename, 'duration'] = duration - else: - LGR.info('Skipping {}'.format(filename)) - return scan_df - - -def load_scan_data(layout, sub, ses=None): - """ - Extract subject- and session-specific scan onsets and durations from - BIDSLayout. - - Parameters - ---------- - layout : BIDSLayout - sub : str - ses : str or None, optional - - Returns - ------- - df : pandas.DataFrame - DataFrame with the following columns: 'filename', 'original_filename', - 'acq_time', 'duration', 'onset'. - 'filename' is the name of the BIDS file minus within-acquisition entities. - 'original_filename' is the actual, existing BIDS file. - 'acq_time' is the acquisition time in datetime format. - 'duration' is the scan duration in seconds. - 'onset' is the scan onset in seconds, starting with zero at the first scan. - """ - # This is the strategy we'll use in the future. Commented out for now. - # scans_file = layout.get(extension='tsv', suffix='scans', sub=sub, ses=ses) - # df = pd.read_table(scans_file) - - # Collect acquisition times - # NOTE: Will be replaced with scans file when heudiconv makes the change - img_files = layout.get(datatype='func', suffix='bold', - extension=['nii.gz', 'nii'], - sub=sub, ses=ses) - df = pd.DataFrame( - columns=['original_filename', 'acq_time'], - ) - for i, img_file in enumerate(img_files): - df.loc[i, 'original_filename'] = op.join('func', img_file.filename) - df.loc[i, 'acq_time'] = img_file.get_metadata()['AcquisitionTime'] - - # Get generic filenames (without within-acquisition entities like echo) - df['filename'] = df['original_filename'].apply(drop_bids_multicontrast_keys) - - # Get "first" scan from multi-file acquisitions - df['acq_time'] = pd.to_datetime(df['acq_time']) - df = df.sort_values(by='acq_time') - df = df.drop_duplicates(subset='filename', keep='first', ignore_index=True) - - # Now back to general-purpose code - df = determine_scan_durations(layout, df, sub=sub, ses=ses) - df = df.dropna(subset=['duration']) # limit to relevant scans - - # Convert scan times to relative onsets (first scan is at 0 seconds) - df['onset'] = (df['acq_time'] - df['acq_time'].min()) - df['onset'] = df['onset'].dt.total_seconds() - return df - - def workflow(bids_dir, physio_file, chtrig, sub, ses=None): """ A potential workflow for running physio/scan onset synchronization and @@ -300,8 +298,15 @@ def workflow(bids_dir, physio_file, chtrig, sub, ses=None): run_dict = {} # could probably be replaced with apply() followed by to_dict() for _, row in scan_df.iterrows(): - base_fname = update_name(row['filename'], suffix='physio', extension='.tsv.gz') + base_fname = update_name(row['filename'], suffix='physio', extension='') split_times = (row['index_onset'], row['index_offset']) run_dict[base_fname] = split_times - phys_dict = slice_phys(physio, run_dict) - return phys_dict + phys_dict = slice_phys(physio, run_dict, time_before=6) + outputs = [] + for k, v in phys_dict.items(): + output = BlueprintOutput.init_from_blueprint(v) + output.filename = k + outputs.append(output) + # Let's not actually save files until we're more confident. + # output.save() + return outputs diff --git a/phys2bids/phys2bids.py b/phys2bids/phys2bids.py index 1a4e316f0..62ba534d8 100644 --- a/phys2bids/phys2bids.py +++ b/phys2bids/phys2bids.py @@ -87,34 +87,6 @@ def print_summary(filename, ntp_expected, ntp_found, samp_freq, time_offset, out utils.writefile(outfile, '.log', summary) -def print_json(outfile, samp_freq, time_offset, ch_name): - """ - Print the json required by BIDS format. - - Parameters - ---------- - outfile: str or path - Fullpath to output file. - samp_freq: float - Frequency of sampling for the output file. - time_offset: float - Difference between beginning of file and first TR. - ch_name: list of str - List of channel names, as specified by BIDS format. - - Notes - ----- - Outcome: - outfile: .json file - File containing information for BIDS. - """ - start_time = -time_offset - summary = dict(SamplingFrequency=samp_freq, - StartTime=round(start_time, 4), - Columns=ch_name) - utils.writejson(outfile, summary, indent=4, sort_keys=False) - - @due.dcite( Doi('10.5281/zenodo.3470091'), path='phys2bids', @@ -413,8 +385,8 @@ def phys2bids(filename, info=False, indir='.', outdir='.', heur_file=None, LGR.info(f'Exporting files for run {run} freq {uniq_freq}') np.savetxt(phys_out[key].filename + '.tsv.gz', phys_out[key].timeseries, fmt='%.8e', delimiter='\t') - print_json(phys_out[key].filename, phys_out[key].freq, - phys_out[key].start_time, phys_out[key].ch_name) + utils.save_json(phys_out[key].filename, phys_out[key].freq, + phys_out[key].start_time, phys_out[key].ch_name) print_summary(filename, num_timepoints_expected, phys_in[run].num_timepoints_found, uniq_freq, phys_out[key].start_time, diff --git a/phys2bids/physio_obj.py b/phys2bids/physio_obj.py index 24fb923b3..04712905f 100644 --- a/phys2bids/physio_obj.py +++ b/phys2bids/physio_obj.py @@ -8,6 +8,8 @@ import numpy as np +from .utils import save_json + LGR = logging.getLogger(__name__) @@ -678,3 +680,9 @@ def init_from_blueprint(cls, blueprint): units = blueprint.units start_time = timeseries[0, 0] return cls(timeseries, freq, ch_name, units, start_time) + + def save(self): + np.savetxt(self.filename + '.tsv.gz', + self.timeseries, fmt='%.8e', delimiter='\t') + save_json(self.filename + '.json', self.freq, + self.start_time, self.ch_name) diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py index 8e0bac7f4..2945d129e 100644 --- a/phys2bids/slice4phys.py +++ b/phys2bids/slice4phys.py @@ -1,9 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import re import logging from copy import deepcopy -import os.path as op import numpy as np @@ -110,7 +108,7 @@ def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9): return run_timestamps -def slice_phys(phys, run_timestamps): +def slice_phys(phys, run_timestamps, time_before=5): """ Slice a physio object based on run-/file-wise onsets and offsets. Adapted from slice4phys with the goal of modularizing slicing functionality @@ -139,12 +137,14 @@ def slice_phys(phys, run_timestamps): unique_frequencies = np.unique(phys.freq) trigger_freq = phys.freq[phys.trigger_idx + 1] for freq in unique_frequencies: + to_subtract = int(time_before * freq) # Get onset and offset for the requested frequency if freq != trigger_freq: - onset = int(trigger_onset * trigger_freq / freq) # no clue if this is right + # no clue if this is right + onset = int(trigger_onset * trigger_freq / freq) - to_subtract offset = int(trigger_offset * trigger_freq / freq) else: - onset = trigger_onset + onset = trigger_onset - to_subtract offset = trigger_offset # Split into frequency-specific object limited to onset-offset @@ -153,9 +153,20 @@ def slice_phys(phys, run_timestamps): temp_phys_in = deepcopy(phys[onset:offset]) not_freq = [i for i in range(len(phys.freq)) if phys.freq[i] != freq] temp_phys_in.delete_at_index(not_freq) + # NOTE: Won't work when timeseries 0 isn't time. + # zero out time + temp_phys_in.timeseries[0] = ( + temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0]) + ) + temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before phys_in_slices[run_fname] = temp_phys_in else: - phys_in_slices[fname] = deepcopy(phys[onset:offset]) + temp_phys_in = deepcopy(phys[onset:offset]) + temp_phys_in.timeseries[0] = ( + temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0]) + ) + temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before + phys_in_slices[fname] = temp_phys_in return phys_in_slices diff --git a/phys2bids/utils.py b/phys2bids/utils.py index 3efeac0cc..b06068813 100644 --- a/phys2bids/utils.py +++ b/phys2bids/utils.py @@ -3,6 +3,7 @@ import json import logging import os +import re import sys from csv import writer from pathlib import Path @@ -27,12 +28,12 @@ def drop_bids_multicontrast_keys(fname): fname : str Valid BIDS filename, minus the within-acquisition entities. """ - dirname = op.dirname(fname) - fname = op.basename(fname) + dirname = os.path.dirname(fname) + fname = os.path.basename(fname) multi_contrast_entities = ['echo', 'part', 'fa', 'inv', 'ch'] regex = '_({})-[0-9a-zA-Z]+'.format('|'.join(multi_contrast_entities)) fname = re.sub(regex, '', fname) - fname = op.join(dirname, fname) + fname = os.path.join(dirname, fname) return fname @@ -59,8 +60,8 @@ def update_name(basename, **kwargs): ENTITY_ORDER = ['sub', 'ses', 'task', 'acq', 'ce', 'rec', 'dir', 'run', 'mod', 'echo', 'recording', 'proc', 'space', 'split'] - outdir = op.dirname(basename) - outname = op.basename(basename) + outdir = os.path.dirname(basename) + outname = os.path.basename(basename) # Determine scan suffix (should always be physio) suffix = outname.split('_')[-1].split('.')[0] @@ -77,7 +78,8 @@ def update_name(basename, **kwargs): outname = outname.replace('_' + suffix + '.', val) elif key == 'extension': - if not val.startswith('.'): + # add leading . if not '' + if val and not val.startswith('.'): val = '.' + val outname = outname.replace(extension, val) else: @@ -106,7 +108,7 @@ def update_name(basename, **kwargs): '_{}-{}{}'.format(key, val, etc) ) break - outname = op.join(outdir, outname) + outname = os.path.join(outdir, outname) return outname @@ -328,6 +330,34 @@ def writefile(filename, ext, text): print(text, file=text_file) +def save_json(outfile, samp_freq, time_offset, ch_name): + """ + Print the json required by BIDS format. + + Parameters + ---------- + outfile: str or path + Fullpath to output file. + samp_freq: float + Frequency of sampling for the output file. + time_offset: float + Difference between beginning of file and first TR. + ch_name: list of str + List of channel names, as specified by BIDS format. + + Notes + ----- + Outcome: + outfile: .json file + File containing information for BIDS. + """ + start_time = -time_offset + summary = dict(SamplingFrequency=samp_freq, + StartTime=round(start_time, 4), + Columns=ch_name) + writejson(outfile, summary, indent=4, sort_keys=False) + + def writejson(filename, data, **kwargs): """ Outputs a json file with the given data inside. From 568d6aabc68dd04b96465f072b4d595db723ae53 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 3 Jul 2020 22:16:13 -0400 Subject: [PATCH 16/28] Use physio object instead of file. This way we can map channel names beforehand. --- phys2bids/conversion.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 07f196845..d19d71aae 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -32,7 +32,6 @@ import nibabel as nib from .slice4phys import update_name, slice_phys -from .interfaces.acq import populate_phys_input from .utils import drop_bids_multicontrast_keys from .physio_obj import BlueprintOutput @@ -261,20 +260,17 @@ def synchronize_onsets(phys_df, scan_df): return scan_df -def workflow(bids_dir, physio_file, chtrig, sub, ses=None): +def workflow(physio, bids_dir, sub, ses=None): """ A potential workflow for running physio/scan onset synchronization and BIDSification. This workflow writes out physio files to a BIDS dataset. Parameters ---------- + physio : BlueprintInput + Object *must* contain multiple physio trigger periods associated with scans. bids_dir : str Path to BIDS dataset - physio_file : str or list of str - Either a single BioPac physio file or multiple physio files from the - same scanning session. Each file *must* contain multiple physio trigger - periods associated with scans. If multiple files are provided, they - must have timestamped segments. sub : str Subject ID. Used to search the BIDS dataset for relevant scans. ses : str or None, optional @@ -283,13 +279,10 @@ def workflow(bids_dir, physio_file, chtrig, sub, ses=None): Returns ------- - out : dict - Keys are output filenames, while values are the chopped up BlueprintInput - objects. + out : list of BlueprintOutput """ layout = BIDSLayout(bids_dir) scan_df = load_scan_data(layout, sub=sub, ses=ses) - physio = populate_phys_input(physio_file, chtrig) trigger_timeseries = physio.timeseries[physio.trigger_idx + 1] freq = physio.freq[physio.trigger_idx + 1] From 4c3809cf4877e5a941671afd7bfc2f13bf36ae85 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 3 Jul 2020 22:19:20 -0400 Subject: [PATCH 17/28] Add requirements. --- requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c8772ca16..df43293c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ numpy>=1.9.3, !=*rc* matplotlib>=3.1.1 ,!=3.3.0rc1 -PyYAML!=*rc* \ No newline at end of file +PyYAML!=*rc* +pybids +pandas From f0222cb700bb965e6315523a2a780ee6591c137c Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 4 Jul 2020 00:09:38 -0400 Subject: [PATCH 18/28] Add plotting function for QC. --- phys2bids/conversion.py | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index d19d71aae..33962db22 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -260,6 +260,63 @@ def synchronize_onsets(phys_df, scan_df): return scan_df +def plot_sync(scan_df, physio_df): + """ + Plot unsynchronized and synchonized scan and physio onsets and durations. + """ + fig, axes = plt.subplots(nrows=2, figsize=(20, 6), sharex=True) + + max_ = int(1000 * np.ceil(max((physio_df['onset'].max(), scan_df[onset_col].max())) / 1000)) + scalar = 10 + x = np.linspace(0, max_, (max_*scalar)+1) + + # first the raw version + physio_timeseries = np.zeros(x.shape) + func_timeseries = np.zeros(x.shape) + for i, row in scan_df.iterrows(): + func_timeseries[ + int(row['onset'] * scalar):int((row['onset'] + row['duration']) * scalar) + ] = 1 + + for i, row in physio_df.iterrows(): + physio_timeseries[ + int(row['onset'] * scalar):int((row['onset'] + row['duration']) * scalar) + ] = 0.5 + + axes[0].fill_between(x, func_timeseries, where=func_timeseries >= 0, + interpolate=True, color='red', alpha=0.3, + label='Functional scans') + axes[0].fill_between(x, physio_timeseries, where=physio_timeseries >= 0, + interpolate=True, color='blue', alpha=0.3, + label='Physio triggers') + + # now the adjusted version + physio_timeseries = np.zeros(x.shape) + func_timeseries = np.zeros(x.shape) + for i, row in scan_df.iterrows(): + func_timeseries[ + int(row['phys_onset'] * scalar):int((row['phys_onset'] + row['duration']) * scalar) + ] = 1 + + for i, row in physio_df.iterrows(): + physio_timeseries[ + int(row['onset'] * scalar):int((row['onset'] + row['duration']) * scalar) + ] = 0.5 + + axes[1].fill_between(x, func_timeseries, where=func_timeseries >= 0, + interpolate=True, color='red', alpha=0.3, + label='Functional scans') + axes[1].fill_between(x, physio_timeseries, where=physio_timeseries >= 0, + interpolate=True, color='blue', alpha=0.3, + label='Physio triggers') + + axes[0].set_xlim((min(x), max(x))) + axes[0].set_ylim((0, None)) + axes[1].set_xlabel('Time (s)') + axes[0].legend() + return fig, axes + + def workflow(physio, bids_dir, sub, ses=None): """ A potential workflow for running physio/scan onset synchronization and @@ -288,6 +345,11 @@ def workflow(physio, bids_dir, sub, ses=None): freq = physio.freq[physio.trigger_idx + 1] physio_df = extract_physio_onsets(trigger_timeseries, freq=freq) scan_df = synchronize_onsets(physio_df, scan_df) + + # we should do something better with this figure, but it's nice to have for QC + fig, axes = plot_sync(scan_df, physio_df) + fig.savefig('synchronization_results.png') + run_dict = {} # could probably be replaced with apply() followed by to_dict() for _, row in scan_df.iterrows(): From b597880160abc9c97508ddae0414eb915f232e27 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 4 Jul 2020 00:11:36 -0400 Subject: [PATCH 19/28] fix. --- phys2bids/conversion.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 33962db22..55deb2be8 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -266,7 +266,11 @@ def plot_sync(scan_df, physio_df): """ fig, axes = plt.subplots(nrows=2, figsize=(20, 6), sharex=True) - max_ = int(1000 * np.ceil(max((physio_df['onset'].max(), scan_df[onset_col].max())) / 1000)) + # get max value rounded to nearest 1000 + max_ = int(1000 * np.ceil(max(( + physio_df['onset'].max(), + scan_df['onset'].max(), + scan_df['phys_onset'].max())) / 1000)) scalar = 10 x = np.linspace(0, max_, (max_*scalar)+1) From d9d03fb1232e2212fae37d5e171999e972f218e1 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 8 Jul 2020 11:47:02 -0400 Subject: [PATCH 20/28] Reorganize functions and generalize update_bids_name. Now that feeding an entity with a value of None will remove the entity from the filename, we can drop `drop_bids_multicontrast_keys`. --- phys2bids/bids.py | 81 ++++++++++++++++++++++++++++++++ phys2bids/conversion.py | 96 +++++++++++++++++++++++++------------- phys2bids/slice4phys.py | 11 +++-- phys2bids/utils.py | 100 ---------------------------------------- 4 files changed, 152 insertions(+), 136 deletions(-) diff --git a/phys2bids/bids.py b/phys2bids/bids.py index 2cfc80fad..d23d878af 100644 --- a/phys2bids/bids.py +++ b/phys2bids/bids.py @@ -1,5 +1,6 @@ import logging import os +import re from csv import reader from pathlib import Path @@ -52,6 +53,86 @@ } +def update_bids_name(basename, **kwargs): + """ + Add entities, suffix, and/or extension to a BIDS filename while retaining + BIDS compatibility. + + Parameters + ---------- + basename : str + Name of valid BIDS file. + kwargs : dict + Keyword arguments indicating entities and/or suffix and/or extension + to update/add to the BIDS filename. If a keyword's value is None, then + the entity will be removed from the filename. + + Returns + ------- + outname : str + Valid BIDS filename with updated entities/suffix/extension. + """ + # This is hardcoded, but would be nice to use the yaml-fied BIDS entity + # table when that's up and running. + ENTITY_ORDER = ['sub', 'ses', 'task', 'acq', 'ce', 'rec', 'dir', 'run', + 'mod', 'echo', 'fa', 'inv', 'mt', 'part', 'ch', + 'recording', 'proc', 'space', 'split'] + + outdir = os.path.dirname(basename) + outname = os.path.basename(basename) + + # Determine scan suffix (should always be physio) + suffix = outname.split('_')[-1].split('.')[0] + extension = '.' + '.'.join(outname.split('_')[-1].split('.')[1:]) + filetype = suffix + extension + + for key, val in kwargs.items(): + if key == 'suffix': + if not val.startswith('_'): + val = '_' + val + + if not val.endswith('.'): + val = val + '.' + + outname = outname.replace('_' + suffix + '.', val) + elif key == 'extension': + # add leading . if not '' + if val and not val.startswith('.'): + val = '.' + val + outname = outname.replace(extension, val) + else: + # entities + if key not in ENTITY_ORDER: + raise ValueError('Key {} not understood.'.format(key)) + + if val is None: + # remove entity from filename if kwarg val is None + regex = '_{}-[0-9a-zA-Z]+'.format(key) + outname = re.sub(regex, '', outname) + elif '_{}-{}'.format(key, val) in basename: + LGR.warning('Key-value pair {}/{} already found in ' + 'basename {}. Skipping.'.format(key, val, basename)) + elif '_{}-'.format(key) in basename: + LGR.warning('Key {} already found in basename {}. ' + 'Overwriting.'.format(key, basename)) + regex = '_{}-[0-9a-zA-Z]+'.format(key) + outname = re.sub(regex, '_{}-{}'.format(key, val), outname) + else: + loc = ENTITY_ORDER.index(key) + entities_to_check = ENTITY_ORDER[loc:] + entities_to_check = ['_{}-'.format(etc) for etc in entities_to_check] + entities_to_check.append('_{}'.format(filetype)) + for etc in entities_to_check: + if etc in outname: + outname = outname.replace( + etc, + '_{}-{}{}'.format(key, val, etc) + ) + break + outname = os.path.join(outdir, outname) + return outname + + def bidsify_units(orig_unit): """ Read the input unit of measure and use the dictionary of aliases diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 55deb2be8..34f25e1b7 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -1,38 +1,20 @@ """ -1. Extract segments from physio file. - - Each segment must be named according to the onset time if there are - multiple segments. If there is only one segment, then there is no need - for a timestamp. -2. Extract trigger periods from physio segments. - - Onset -3. Extract scan times - - Name - - Onset with subsecond resolution as close to trigger pulse as possible - - Duration (from TR * data dimension) -4. Calculate difference in time between each scan and each physio trigger. -5. Use differences to identify similar values across as many physio/scan pairs as possible. - - Physio may be missing if trigger failed. - - Physio may be delayed if task was manually trigger after scan began - (sometimes happens with resting-state scans where the task itself isn't - very important). - - Scan may be missing if it wasn't converted (e.g., a scan stopped early - and re-run). -6. Assign scan names to trigger period, infer times for other scan times - in cases where trigger failed, and ignore trigger periods associated with - scans that weren't kept in the BIDS dataset. +A small module (to be broken up) with functions for synchronizing multi-run +physio files with pre-converted BIDS imaging data. """ import os.path as op from operator import itemgetter from itertools import groupby import logging +import matplotlib.pyplot as plt from bids import BIDSLayout import pandas as pd import numpy as np import nibabel as nib -from .slice4phys import update_name, slice_phys -from .utils import drop_bids_multicontrast_keys +from .bids import update_bids_name +from .slice4phys import slice_phys from .physio_obj import BlueprintOutput LGR = logging.getLogger(__name__) @@ -76,7 +58,10 @@ def load_scan_data(layout, sub, ses=None): df.loc[i, 'acq_time'] = img_file.get_metadata()['AcquisitionTime'] # Get generic filenames (without within-acquisition entities like echo) - df['filename'] = df['original_filename'].apply(drop_bids_multicontrast_keys) + df['filename'] = df['original_filename'].apply(update_bids_name, + echo=None, fa=None, + inv=None, mt=None, part=None, + ch=None) # Get "first" scan from multi-file acquisitions df['acq_time'] = pd.to_datetime(df['acq_time']) @@ -117,7 +102,6 @@ def determine_scan_durations(layout, scan_df, sub, ses=None): Updated DataFrame with new "duration" column. Calculated durations are in seconds. """ - # TODO: parse entities in func files for searches instead of larger search. func_files = layout.get(datatype='func', suffix='bold', extension=['.nii.gz', '.nii'], subject=sub, session=ses) @@ -263,18 +247,42 @@ def synchronize_onsets(phys_df, scan_df): def plot_sync(scan_df, physio_df): """ Plot unsynchronized and synchonized scan and physio onsets and durations. + + Parameters + ---------- + scan_df : pandas.DataFrame + DataFrame with timing associated with scan files. Must have the + following columns: onset (scan onsets in seconds), duration (scan + durations in seconds), and phys_onset (scan onsets after being matches + with physio onsets, in seconds). + physio_df : pandas.DataFrame + DataFrame with timing associated with physio trigger periods. Must have + the following columns: onset (trigger period onsets in seconds) and + duration (trigger period durations in seconds). + + Returns + ------- + fig : matplotlib.pyplot.Figure + Figure from plot. + axes : list of two matplotlib.pyplot.Axis objects + Axis objects for top and bottom subplots of figure. Top subplot has + unsynchronized onsets and bottom has synchronized ones. """ fig, axes = plt.subplots(nrows=2, figsize=(20, 6), sharex=True) - # get max value rounded to nearest 1000 + # get max (onset time + duration) rounded up to nearest 1000 max_ = int(1000 * np.ceil(max(( - physio_df['onset'].max(), - scan_df['onset'].max(), - scan_df['phys_onset'].max())) / 1000)) + (physio_df['onset'] + physio_df['duration']).max(), + (scan_df['onset'] + scan_df['duration']).max(), + (scan_df['phys_onset'] + scan_df['duration']).max() + )) / 1000)) + + # get x-axis values scalar = 10 x = np.linspace(0, max_, (max_*scalar)+1) - # first the raw version + # first plot the onsets and durations of the raw scan and physio runs in + # the top axis physio_timeseries = np.zeros(x.shape) func_timeseries = np.zeros(x.shape) for i, row in scan_df.iterrows(): @@ -294,7 +302,7 @@ def plot_sync(scan_df, physio_df): interpolate=True, color='blue', alpha=0.3, label='Physio triggers') - # now the adjusted version + # now plot the adjusted onsets (and original durations) in the bottom axis physio_timeseries = np.zeros(x.shape) func_timeseries = np.zeros(x.shape) for i, row in scan_df.iterrows(): @@ -341,6 +349,30 @@ def workflow(physio, bids_dir, sub, ses=None): Returns ------- out : list of BlueprintOutput + + Notes + ----- + The workflow works as follows: + 1. Extract trigger period onsets from physio file. + 2. Extract scan times from BIDS dataset, to get the following information: + - Scan name + - Onset with subsecond resolution as close to trigger pulse as possible + - Duration (from TR * data dimension) + 3. Calculate difference in time between each scan onset and each physio + trigger onset. + 4. Use differences to identify similar values across as many physio/scan + pairs as possible. Physio may be missing if trigger failed. + Physio may be delayed if task was manually triggered after scan began, + or if there's a bug in the task script. Scan may be missing if it wasn't + converted (e.g., a scan stopped early and re-run). + 5. Infer physio periods from scan periods, once physio and scan onsets + are matched. + 6. Assign scan names to physio periods, infer times for other scan times + in cases where trigger failed, and ignore trigger periods associated with + scans that weren't kept in the BIDS dataset. + 7. Split physio file based on scan-specific onsets and offsets. + 8. Write out scan-associated physio files. + 9. Generate and return scan/physio onsets figure for manual QC. """ layout = BIDSLayout(bids_dir) scan_df = load_scan_data(layout, sub=sub, ses=ses) @@ -357,7 +389,7 @@ def workflow(physio, bids_dir, sub, ses=None): run_dict = {} # could probably be replaced with apply() followed by to_dict() for _, row in scan_df.iterrows(): - base_fname = update_name(row['filename'], suffix='physio', extension='') + base_fname = update_bids_name(row['filename'], suffix='physio', extension='') split_times = (row['index_onset'], row['index_offset']) run_dict[base_fname] = split_times phys_dict = slice_phys(physio, run_dict, time_before=6) diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py index 2945d129e..47e3e37e7 100644 --- a/phys2bids/slice4phys.py +++ b/phys2bids/slice4phys.py @@ -5,7 +5,7 @@ import numpy as np -from phys2bids.utils import update_name +from phys2bids.bids import update_bids_name LGR = logging.getLogger(__name__) @@ -111,8 +111,6 @@ def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9): def slice_phys(phys, run_timestamps, time_before=5): """ Slice a physio object based on run-/file-wise onsets and offsets. - Adapted from slice4phys with the goal of modularizing slicing functionality - (i.e., cutting out the run detection step). Parameters ---------- @@ -127,6 +125,11 @@ def slice_phys(phys, run_timestamps, time_before=5): phys_in_slices : dict Each key is a run-wise filename (possibly further split by frequency) and each value is a BlueprintInput object. + + Notes + ----- + The goal of this function is to abstract out the general slicing procedure + from slice4phys. """ phys_in_slices = {} for i_run, fname in enumerate(run_timestamps.keys()): @@ -149,7 +152,7 @@ def slice_phys(phys, run_timestamps, time_before=5): # Split into frequency-specific object limited to onset-offset if len(unique_frequencies) > 1: - run_fname = update_name(fname, recording=str(freq) + 'Hz') + run_fname = update_bids_name(fname, recording=str(freq) + 'Hz') temp_phys_in = deepcopy(phys[onset:offset]) not_freq = [i for i in range(len(phys.freq)) if phys.freq[i] != freq] temp_phys_in.delete_at_index(not_freq) diff --git a/phys2bids/utils.py b/phys2bids/utils.py index b06068813..61f6b9b0e 100644 --- a/phys2bids/utils.py +++ b/phys2bids/utils.py @@ -3,7 +3,6 @@ import json import logging import os -import re import sys from csv import writer from pathlib import Path @@ -13,105 +12,6 @@ SUPPORTED_FTYPES = ('acq', 'txt') # 'mat', ... -def drop_bids_multicontrast_keys(fname): - """ - Use regular expressions to remove within-acquisition entities (e.g., echo) - from a BIDS filename. - - Parameters - ---------- - fname : str - Valid BIDS filename. - - Returns - ------- - fname : str - Valid BIDS filename, minus the within-acquisition entities. - """ - dirname = os.path.dirname(fname) - fname = os.path.basename(fname) - multi_contrast_entities = ['echo', 'part', 'fa', 'inv', 'ch'] - regex = '_({})-[0-9a-zA-Z]+'.format('|'.join(multi_contrast_entities)) - fname = re.sub(regex, '', fname) - fname = os.path.join(dirname, fname) - return fname - - -def update_name(basename, **kwargs): - """ - Add entities, suffix, and/or extension to a BIDS filename while retaining - BIDS compatibility. - - Parameters - ---------- - basename : str - Name of valid BIDS file. - kwargs : dict - Keyword arguments indicating entities and/or suffix and/or extension - to update/add to the BIDS filename. - - Returns - ------- - outname : str - Valid BIDS filename with updated entities/suffix/extension. - """ - # This is hardcoded, but would be nice to use the yaml-fied BIDS entity - # table when that's up and running. - ENTITY_ORDER = ['sub', 'ses', 'task', 'acq', 'ce', 'rec', 'dir', 'run', - 'mod', 'echo', 'recording', 'proc', 'space', 'split'] - - outdir = os.path.dirname(basename) - outname = os.path.basename(basename) - - # Determine scan suffix (should always be physio) - suffix = outname.split('_')[-1].split('.')[0] - extension = '.' + '.'.join(outname.split('_')[-1].split('.')[1:]) - filetype = suffix + extension - - for key, val in kwargs.items(): - if key == 'suffix': - if not val.startswith('_'): - val = '_' + val - - if not val.endswith('.'): - val = val + '.' - - outname = outname.replace('_' + suffix + '.', val) - elif key == 'extension': - # add leading . if not '' - if val and not val.startswith('.'): - val = '.' + val - outname = outname.replace(extension, val) - else: - if key not in ENTITY_ORDER: - raise ValueError('Key {} not understood.'.format(key)) - - # entities - if '_{}-{}'.format(key, val) in basename: - LGR.warning('Key {} already found in basename {}. ' - 'Skipping.'.format(key, basename)) - - elif '_{}-'.format(key) in basename: - LGR.warning('Key {} already found in basename {}. ' - 'Overwriting.'.format(key, basename)) - regex = '_{}-[0-9a-zA-Z]+'.format(key) - outname = re.sub(regex, '_{}-{}'.format(key, val), outname) - else: - loc = ENTITY_ORDER.index(key) - entities_to_check = ENTITY_ORDER[loc:] - entities_to_check = ['_{}-'.format(etc) for etc in entities_to_check] - entities_to_check.append('_{}'.format(filetype)) - for etc in entities_to_check: - if etc in outname: - outname = outname.replace( - etc, - '_{}-{}{}'.format(key, val, etc) - ) - break - outname = os.path.join(outdir, outname) - return outname - - def check_input_dir(indir): """ Checks that the given indir doesn't have a trailing `/` From 92c99e83c498784b347a1058dfcfa4eb7ab12171 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 8 Jul 2020 11:50:13 -0400 Subject: [PATCH 21/28] Fix style issue. --- phys2bids/conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 34f25e1b7..837fac965 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -279,7 +279,7 @@ def plot_sync(scan_df, physio_df): # get x-axis values scalar = 10 - x = np.linspace(0, max_, (max_*scalar)+1) + x = np.linspace(0, max_, (max_ * scalar) + 1) # first plot the onsets and durations of the raw scan and physio runs in # the top axis From 17dfa07e8c53337f14fe3a68df5a125a9fa8c11e Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sun, 19 Jul 2020 16:46:40 -0400 Subject: [PATCH 22/28] Update. --- phys2bids/conversion.py | 19 +++++++-- phys2bids/slice4phys.py | 94 ++++++++++++++++++++++++++++------------- 2 files changed, 79 insertions(+), 34 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 837fac965..9935be2a8 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -235,6 +235,7 @@ def synchronize_onsets(phys_df, scan_df): # Get onset of each scan in terms of the physio time series scan_df['phys_onset'] = scan_df['onset'] + offset + print(phys_df) rise = (phys_df.loc[1, 'index'] - phys_df.loc[0, 'index']) run = (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset']) samplerate = rise / run @@ -329,7 +330,7 @@ def plot_sync(scan_df, physio_df): return fig, axes -def workflow(physio, bids_dir, sub, ses=None): +def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False): """ A potential workflow for running physio/scan onset synchronization and BIDSification. This workflow writes out physio files to a BIDS dataset. @@ -345,6 +346,16 @@ def workflow(physio, bids_dir, sub, ses=None): ses : str or None, optional Session ID. Used to search the BIDS dataset for relevant scans in longitudinal studies. Default is None. + padding : float or tuple, optional + Amount of time before and after run to keep in physio time series, in seconds. + May be a single value (in which case the time before and after is the same) or + a two-item tuple (which case the first item is time before and the second is + time after). + These values will be automatically reduced in cases where the pad would extend + before or after the physio acquisition. + update_trigger : bool, optional + Whether to update the trigger channel time series based on estimated scan onsets from + the BIDS dataset (True) or to leave it as-is (False). Default is False. Returns ------- @@ -377,8 +388,8 @@ def workflow(physio, bids_dir, sub, ses=None): layout = BIDSLayout(bids_dir) scan_df = load_scan_data(layout, sub=sub, ses=ses) - trigger_timeseries = physio.timeseries[physio.trigger_idx + 1] - freq = physio.freq[physio.trigger_idx + 1] + trigger_timeseries = physio.timeseries[physio.trigger_idx] + freq = physio.freq[physio.trigger_idx] physio_df = extract_physio_onsets(trigger_timeseries, freq=freq) scan_df = synchronize_onsets(physio_df, scan_df) @@ -392,7 +403,7 @@ def workflow(physio, bids_dir, sub, ses=None): base_fname = update_bids_name(row['filename'], suffix='physio', extension='') split_times = (row['index_onset'], row['index_offset']) run_dict[base_fname] = split_times - phys_dict = slice_phys(physio, run_dict, time_before=6) + phys_dict = slice_phys(physio, run_dict, padding=padding, update_trigger=update_trigger) outputs = [] for k, v in phys_dict.items(): output = BlueprintOutput.init_from_blueprint(v) diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py index 47e3e37e7..41b737d1b 100644 --- a/phys2bids/slice4phys.py +++ b/phys2bids/slice4phys.py @@ -108,7 +108,7 @@ def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9): return run_timestamps -def slice_phys(phys, run_timestamps, time_before=5): +def slice_phys(phys, run_timestamps, padding=9, update_trigger=False): """ Slice a physio object based on run-/file-wise onsets and offsets. @@ -119,6 +119,13 @@ def slice_phys(phys, run_timestamps, time_before=5): Each key is a run-wise filename and value is a tuple of (onset, offset), where onset and offset are integers corresponding to index values of the trigger channel. + padding : float or tuple, optional + Amount of time before and after run to keep in physio time series, in seconds. + May be a single value (in which case the time before and after is the same) or + a two-item tuple (which case the first item is time before and the second is + time after). + These values will be automatically reduced in cases where the pad would extend + before or after the physio acquisition. Returns ------- @@ -131,45 +138,72 @@ def slice_phys(phys, run_timestamps, time_before=5): The goal of this function is to abstract out the general slicing procedure from slice4phys. """ + phys = deepcopy(phys) + if update_trigger: + phys.freq.append(phys.freq[phys.trigger_idx]) + phys.units.append(phys.units[phys.trigger_idx]) + phys.timeseries.append(phys.timeseries[phys.trigger_idx].copy()) + phys.ch_name.append('Original Trigger') + + if not isinstance(padding, tuple): + time_before, time_after = padding, padding + else: + time_before, time_after = padding + phys_in_slices = {} for i_run, fname in enumerate(run_timestamps.keys()): - # tmp variable to collect run's info - run_attributes = run_timestamps[fname] + run_attributes = run_timestamps[fname] # tmp variable to collect run's info trigger_onset, trigger_offset = run_attributes + min_onset, max_offset = 0, phys.timeseries[phys.trigger_idx].shape[0] unique_frequencies = np.unique(phys.freq) - trigger_freq = phys.freq[phys.trigger_idx + 1] + trigger_freq = phys.freq[phys.trigger_idx] + + # Limit padding based on beginning and end of physio recording. + run_time_before = np.minimum(time_before, (trigger_onset - min_onset) / trigger_freq) + run_time_after = np.minimum(time_after, (max_offset - trigger_offset) / trigger_freq) + for freq in unique_frequencies: - to_subtract = int(time_before * freq) - # Get onset and offset for the requested frequency - if freq != trigger_freq: - # no clue if this is right - onset = int(trigger_onset * trigger_freq / freq) - to_subtract - offset = int(trigger_offset * trigger_freq / freq) - else: - onset = trigger_onset - to_subtract - offset = trigger_offset + to_subtract = int(run_time_before * freq) + to_add = int(run_time_after * freq) + + run_onset_idx = int(trigger_onset * freq / trigger_freq) - to_subtract + run_offset_idx = int(trigger_offset * freq / trigger_freq) + to_add # Split into frequency-specific object limited to onset-offset - if len(unique_frequencies) > 1: - run_fname = update_bids_name(fname, recording=str(freq) + 'Hz') - temp_phys_in = deepcopy(phys[onset:offset]) + temp_phys_in = deepcopy(phys[run_onset_idx:run_offset_idx]) + + if update_trigger: + # onset/offset of trigger period in terms of frequency + trigger_onset_idx = int(trigger_onset * freq / trigger_freq) + trigger_offset_idx = int(trigger_offset * freq / trigger_freq) + # replace trigger period with block of 1s and non-trigger with 0s + temp_phys_in.timeseries[temp_phys_in.trigger_idx][:] = 0 + temp_phys_in.timeseries[temp_phys_in.trigger_idx][ + trigger_onset_idx:trigger_offset_idx + ] = 1 + + dog = False + if dog: + # This won't do anything if there's only one frequency not_freq = [i for i in range(len(phys.freq)) if phys.freq[i] != freq] + not_freq = [i for i in not_freq if i != 0] # retain time + min_time = np.min(temp_phys_in.timeseries[0]) + temp_phys_in.timeseries[0] + temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] * freq / trigger_freq temp_phys_in.delete_at_index(not_freq) - # NOTE: Won't work when timeseries 0 isn't time. - # zero out time - temp_phys_in.timeseries[0] = ( - temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0]) - ) - temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before - phys_in_slices[run_fname] = temp_phys_in - else: - temp_phys_in = deepcopy(phys[onset:offset]) - temp_phys_in.timeseries[0] = ( - temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0]) - ) - temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before - phys_in_slices[fname] = temp_phys_in + + # NOTE: Won't work when timeseries 0 isn't time. + # zero out time + temp_phys_in.timeseries[0] = ( + temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0]) + ) + # Now take out the time before the scan starts + temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before + + if len(unique_frequencies) > 1: + run_fname = update_bids_name(fname, recording=str(freq) + 'Hz') + phys_in_slices[run_fname] = temp_phys_in return phys_in_slices From cd1dea59bc1c4c3b9aafd6a150f50ba4029f594b Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sun, 19 Jul 2020 18:12:14 -0400 Subject: [PATCH 23/28] Get trigger update and resampling working. --- phys2bids/conversion.py | 1 - phys2bids/slice4phys.py | 67 ++++++++++++++++++++++++++--------------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 9935be2a8..48fa577be 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -235,7 +235,6 @@ def synchronize_onsets(phys_df, scan_df): # Get onset of each scan in terms of the physio time series scan_df['phys_onset'] = scan_df['onset'] + offset - print(phys_df) rise = (phys_df.loc[1, 'index'] - phys_df.loc[0, 'index']) run = (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset']) samplerate = rise / run diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py index 41b737d1b..4c16f53ef 100644 --- a/phys2bids/slice4phys.py +++ b/phys2bids/slice4phys.py @@ -4,6 +4,7 @@ from copy import deepcopy import numpy as np +from scipy.signal import resample from phys2bids.bids import update_bids_name @@ -126,6 +127,9 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False): time after). These values will be automatically reduced in cases where the pad would extend before or after the physio acquisition. + update_trigger : bool, optional + Whether to update the trigger channel time series based on estimated scan onsets from + the BIDS dataset (True) or to leave it as-is (False). Default is False. Returns ------- @@ -143,7 +147,12 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False): phys.freq.append(phys.freq[phys.trigger_idx]) phys.units.append(phys.units[phys.trigger_idx]) phys.timeseries.append(phys.timeseries[phys.trigger_idx].copy()) - phys.ch_name.append('Original Trigger') + phys.ch_name.append('original trigger') + + # Fix up the trigger time series + phys.timeseries[phys.trigger_idx][:] = 0 + for ons, off in run_timestamps.values(): + phys.timeseries[phys.trigger_idx][ons:off] = 1 if not isinstance(padding, tuple): time_before, time_after = padding, padding @@ -160,6 +169,7 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False): trigger_freq = phys.freq[phys.trigger_idx] # Limit padding based on beginning and end of physio recording. + # We could also limit padding to prevent overlap between scans, if desired. run_time_before = np.minimum(time_before, (trigger_onset - min_onset) / trigger_freq) run_time_after = np.minimum(time_after, (max_offset - trigger_offset) / trigger_freq) @@ -173,27 +183,38 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False): # Split into frequency-specific object limited to onset-offset temp_phys_in = deepcopy(phys[run_onset_idx:run_offset_idx]) - if update_trigger: - # onset/offset of trigger period in terms of frequency - trigger_onset_idx = int(trigger_onset * freq / trigger_freq) - trigger_offset_idx = int(trigger_offset * freq / trigger_freq) - # replace trigger period with block of 1s and non-trigger with 0s - temp_phys_in.timeseries[temp_phys_in.trigger_idx][:] = 0 - temp_phys_in.timeseries[temp_phys_in.trigger_idx][ - trigger_onset_idx:trigger_offset_idx - ] = 1 - - dog = False - if dog: - # This won't do anything if there's only one frequency - not_freq = [i for i in range(len(phys.freq)) if phys.freq[i] != freq] - not_freq = [i for i in not_freq if i != 0] # retain time - min_time = np.min(temp_phys_in.timeseries[0]) - temp_phys_in.timeseries[0] - temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] * freq / trigger_freq - temp_phys_in.delete_at_index(not_freq) - - # NOTE: Won't work when timeseries 0 isn't time. + if temp_phys_in.freq[temp_phys_in.trigger_idx] != freq: + example_ts = phys.timeseries[phys.freq.index(freq)] + + # Determine time + new_time = (np.arange(example_ts.shape[0]) / temp_phys_in.timeseries[ + phys.freq.index(freq)].freq + ) + new_time += np.min(temp_phys_in.timeseries[0]) + temp_phys_in.timeseries[0] = new_time + temp_phys_in.freq[0] = freq + + # Resample trigger + new_trigger = temp_phys_in.timeseries[temp_phys_in.trigger_idx] + new_trigger = resample(new_trigger, example_ts.shape[0], window=10) + temp_phys_in.timeseries[temp_phys_in.trigger_idx] = new_trigger + temp_phys_in.freq[temp_phys_in.trigger_idx] = freq + + if len(unique_frequencies) > 1: + run_fname = update_bids_name(fname, recording=str(freq) + 'Hz') + + # Drop other frequency channels + channel_idx = np.arange(temp_phys_in.ch_amount) + nonfreq_channels = [i for i in channel_idx if temp_phys_in.freq[i] != freq] + freq_channels = [i for i in channel_idx if temp_phys_in.freq[i] == freq] + temp_phys_in = temp_phys_in.delete_at_index(nonfreq_channels) + + # Update trigger channel index around dropped channels + new_trigger_idx = freq_channels.index(temp_phys_in.trigger_idx) + temp_phys_in.trigger_idx = new_trigger_idx + else: + run_fname = fname + # zero out time temp_phys_in.timeseries[0] = ( temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0]) @@ -201,8 +222,6 @@ def slice_phys(phys, run_timestamps, padding=9, update_trigger=False): # Now take out the time before the scan starts temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before - if len(unique_frequencies) > 1: - run_fname = update_bids_name(fname, recording=str(freq) + 'Hz') phys_in_slices[run_fname] = temp_phys_in return phys_in_slices From 233bb11257d983ee80d4e99adca96af3c15cee80 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sun, 19 Jul 2020 18:23:03 -0400 Subject: [PATCH 24/28] Update requirements. --- requirements.txt | 1 + setup.cfg | 3 +++ 2 files changed, 4 insertions(+) diff --git a/requirements.txt b/requirements.txt index df43293c6..63ff2c8bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ numpy>=1.9.3, !=*rc* matplotlib>=3.1.1 ,!=3.3.0rc1 PyYAML!=*rc* +scipy pybids pandas diff --git a/setup.cfg b/setup.cfg index 1c921e44c..05642ebf2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,6 +25,9 @@ install_requires = numpy >=1.9.3, !=*rc* matplotlib >=3.1.1, !=3.3.0rc1 PyYAML !=*rc* + scipy + pybids + pandas tests_require = pytest >=3.6 test_suite = pytest From 56f0ad090d4cd7ec796208fcf594086ce6da661d Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 10 Oct 2020 11:06:09 -0400 Subject: [PATCH 25/28] Run black on files. --- phys2bids/bids.py | 338 +++++++++++++++++++++++++--------------- phys2bids/conversion.py | 175 +++++++++++++-------- phys2bids/physio_obj.py | 219 ++++++++++++++++---------- 3 files changed, 457 insertions(+), 275 deletions(-) diff --git a/phys2bids/bids.py b/phys2bids/bids.py index 6ee6c6c1d..50b6c3b63 100644 --- a/phys2bids/bids.py +++ b/phys2bids/bids.py @@ -12,45 +12,90 @@ LGR = logging.getLogger(__name__) UNIT_ALIASES = { - # kelvin: thermodynamic temperature - 'kelvin': 'K', 'kelvins': 'K', - # mole: amount of substance - 'mol': 'mol', 'mole': 'mol', - # newton: force, weight - 'newton': 'N', 'newtons': 'N', - # pascal: pressure, stress - 'pascal': 'Pa', 'pascals': 'Pa', 'pa': 'Pa', - # volt: voltage (electrical potential), emf - 'volt': 'V', 'volts': 'V', - # degree Celsius: temperature relative to 273.15 K - '°c': '°C', '°celsius': '°C', 'celsius': '°C', - # ampere: electric current - 'ampere': 'A', 'amp': 'A', 'amps': 'A', - # second: time and hertzs: frequency - # siemens: electric conductance (e.g. EDA) - 'siemens': 'S', - # second: time and hertzs - '1/hz': 's', '1/hertz': 's', 'hz': 'Hz', - '1/s': 'Hz', '1/second': 'Hz', '1/seconds': 'Hz', - '1/sec': 'Hz', '1/secs': 'Hz', 'hertz': 'Hz', - 'second': 's', 'seconds': 's', 'sec': 's', - 'secs': 's', - # All the aliases with one letter (to avoid issues) - 'k': 'K', 'n': 'N', 'v': 'V', 'c': '°C', 'a': 'A', 's': 's', + # kelvin: thermodynamic temperature + "kelvin": "K", + "kelvins": "K", + # mole: amount of substance + "mol": "mol", + "mole": "mol", + # newton: force, weight + "newton": "N", + "newtons": "N", + # pascal: pressure, stress + "pascal": "Pa", + "pascals": "Pa", + "pa": "Pa", + # volt: voltage (electrical potential), emf + "volt": "V", + "volts": "V", + # degree Celsius: temperature relative to 273.15 K + "°c": "°C", + "°celsius": "°C", + "celsius": "°C", + # ampere: electric current + "ampere": "A", + "amp": "A", + "amps": "A", + # second: time and hertzs: frequency + # siemens: electric conductance (e.g. EDA) + "siemens": "S", + # second: time and hertzs + "1/hz": "s", + "1/hertz": "s", + "hz": "Hz", + "1/s": "Hz", + "1/second": "Hz", + "1/seconds": "Hz", + "1/sec": "Hz", + "1/secs": "Hz", + "hertz": "Hz", + "second": "s", + "seconds": "s", + "sec": "s", + "secs": "s", + # All the aliases with one letter (to avoid issues) + "k": "K", + "n": "N", + "v": "V", + "c": "°C", + "a": "A", + "s": "s", } # Init dictionary of aliases for multipliers. Entries are still lowercase PREFIX_ALIASES = { - # Multiples - skip "mega" and only up to "tera" - 'da': 'da', 'deca': 'da', 'h': 'h', 'hecto': 'h', - 'k': 'k', 'kilo': 'k', 'g': 'G', 'giga': 'G', 't': 'T', - 'tera': 'T', - # Submultipliers - 'd': 'd', 'deci': 'd', 'c': 'c', 'centi': 'c', - 'milli': 'm', 'm': 'm', 'µ': 'µ', 'micro': 'µ', - 'n': 'n', 'nano': 'n', 'p': 'p', 'pico': 'p', - 'f': 'f', 'femto': 'f', 'a': 'a', 'atto': 'a', - 'z': 'z', 'zepto': 'z', 'y': 'y', 'yocto': 'y', + # Multiples - skip "mega" and only up to "tera" + "da": "da", + "deca": "da", + "h": "h", + "hecto": "h", + "k": "k", + "kilo": "k", + "g": "G", + "giga": "G", + "t": "T", + "tera": "T", + # Submultipliers + "d": "d", + "deci": "d", + "c": "c", + "centi": "c", + "milli": "m", + "m": "m", + "µ": "µ", + "micro": "µ", + "n": "n", + "nano": "n", + "p": "p", + "pico": "p", + "f": "f", + "femto": "f", + "a": "a", + "atto": "a", + "z": "z", + "zepto": "z", + "y": "y", + "yocto": "y", } @@ -75,60 +120,79 @@ def update_bids_name(basename, **kwargs): """ # This is hardcoded, but would be nice to use the yaml-fied BIDS entity # table when that's up and running. - ENTITY_ORDER = ['sub', 'ses', 'task', 'acq', 'ce', 'rec', 'dir', 'run', - 'mod', 'echo', 'fa', 'inv', 'mt', 'part', 'ch', - 'recording', 'proc', 'space', 'split'] + ENTITY_ORDER = [ + "sub", + "ses", + "task", + "acq", + "ce", + "rec", + "dir", + "run", + "mod", + "echo", + "fa", + "inv", + "mt", + "part", + "ch", + "recording", + "proc", + "space", + "split", + ] outdir = os.path.dirname(basename) outname = os.path.basename(basename) # Determine scan suffix (should always be physio) - suffix = outname.split('_')[-1].split('.')[0] - extension = '.' + '.'.join(outname.split('_')[-1].split('.')[1:]) + suffix = outname.split("_")[-1].split(".")[0] + extension = "." + ".".join(outname.split("_")[-1].split(".")[1:]) filetype = suffix + extension for key, val in kwargs.items(): - if key == 'suffix': - if not val.startswith('_'): - val = '_' + val + if key == "suffix": + if not val.startswith("_"): + val = "_" + val - if not val.endswith('.'): - val = val + '.' + if not val.endswith("."): + val = val + "." - outname = outname.replace('_' + suffix + '.', val) - elif key == 'extension': + outname = outname.replace("_" + suffix + ".", val) + elif key == "extension": # add leading . if not '' - if val and not val.startswith('.'): - val = '.' + val + if val and not val.startswith("."): + val = "." + val outname = outname.replace(extension, val) else: # entities if key not in ENTITY_ORDER: - raise ValueError('Key {} not understood.'.format(key)) + raise ValueError("Key {} not understood.".format(key)) if val is None: # remove entity from filename if kwarg val is None - regex = '_{}-[0-9a-zA-Z]+'.format(key) - outname = re.sub(regex, '', outname) - elif '_{}-{}'.format(key, val) in basename: - LGR.warning('Key-value pair {}/{} already found in ' - 'basename {}. Skipping.'.format(key, val, basename)) - elif '_{}-'.format(key) in basename: - LGR.warning('Key {} already found in basename {}. ' - 'Overwriting.'.format(key, basename)) - regex = '_{}-[0-9a-zA-Z]+'.format(key) - outname = re.sub(regex, '_{}-{}'.format(key, val), outname) + regex = "_{}-[0-9a-zA-Z]+".format(key) + outname = re.sub(regex, "", outname) + elif "_{}-{}".format(key, val) in basename: + LGR.warning( + "Key-value pair {}/{} already found in " + "basename {}. Skipping.".format(key, val, basename) + ) + elif "_{}-".format(key) in basename: + LGR.warning( + "Key {} already found in basename {}. " + "Overwriting.".format(key, basename) + ) + regex = "_{}-[0-9a-zA-Z]+".format(key) + outname = re.sub(regex, "_{}-{}".format(key, val), outname) else: loc = ENTITY_ORDER.index(key) entities_to_check = ENTITY_ORDER[loc:] - entities_to_check = ['_{}-'.format(etc) for etc in entities_to_check] - entities_to_check.append('_{}'.format(filetype)) + entities_to_check = ["_{}-".format(etc) for etc in entities_to_check] + entities_to_check.append("_{}".format(filetype)) for etc in entities_to_check: if etc in outname: - outname = outname.replace( - etc, - '_{}-{}{}'.format(key, val, etc) - ) + outname = outname.replace(etc, "_{}-{}{}".format(key, val, etc)) break outname = os.path.join(outdir, outname) return outname @@ -162,26 +226,29 @@ def bidsify_units(orig_unit): # check that u_key is part of unit if unit.endswith(u_key): new_unit = UNIT_ALIASES[u_key] - unit = unit[:-len(u_key)] - if unit != '': + unit = unit[: -len(u_key)] + if unit != "": # for every prefix alias - prefix = PREFIX_ALIASES.get(unit, '') - if prefix == '': - LGR.warning(f'The given unit prefix {unit} does not ' - 'have aliases, passing it as is') - prefix = orig_unit[:len(unit)] + prefix = PREFIX_ALIASES.get(unit, "") + if prefix == "": + LGR.warning( + f"The given unit prefix {unit} does not " + "have aliases, passing it as is" + ) + prefix = orig_unit[: len(unit)] return prefix + new_unit else: return new_unit # If we conclude the loop without returning, it means the unit doesn't have aliases - LGR.warning(f'The given unit {orig_unit} does not have aliases, ' - f'passing it as is') + LGR.warning( + f"The given unit {orig_unit} does not have aliases, " f"passing it as is" + ) return orig_unit -def use_heuristic(heur_file, sub, ses, filename, outdir, run='', record_label=''): +def use_heuristic(heur_file, sub, ses, filename, outdir, run="", record_label=""): """ Import and use the heuristic specified by the user to rename the file. @@ -214,47 +281,57 @@ def use_heuristic(heur_file, sub, ses, filename, outdir, run='', record_label='' utils.check_file_exists(heur_file) # Initialise a dictionary of bids_keys that has already "recording" - bids_keys = {'sub': '', 'ses': '', 'task': '', 'acq': '', 'ce': '', - 'dir': '', 'rec': '', 'run': run, 'recording': record_label} + bids_keys = { + "sub": "", + "ses": "", + "task": "", + "acq": "", + "ce": "", + "dir": "", + "rec": "", + "run": run, + "recording": record_label, + } # Start filling bids_keys dictionary and path with subject and session - if sub.startswith('sub-'): - bids_keys['sub'] = sub[4:] + if sub.startswith("sub-"): + bids_keys["sub"] = sub[4:] fldr = os.path.join(outdir, sub) else: - bids_keys['sub'] = sub - fldr = os.path.join(outdir, f'sub-{sub}') + bids_keys["sub"] = sub + fldr = os.path.join(outdir, f"sub-{sub}") if ses: - if ses.startswith('ses-'): - bids_keys['ses'] = ses[4:] + if ses.startswith("ses-"): + bids_keys["ses"] = ses[4:] fldr = os.path.join(fldr, ses) else: - bids_keys['ses'] = ses - fldr = os.path.join(fldr, f'ses-{ses}') + bids_keys["ses"] = ses + fldr = os.path.join(fldr, f"ses-{ses}") # Load heuristic and use it to fill dictionary heur = utils.load_heuristic(heur_file) bids_keys.update(heur.heur(Path(filename).stem, run)) # If bids_keys['task'] is still empty, stop the program - if not bids_keys['task']: - LGR.warning(f'The heuristic {heur_file} could not deal with' - f'{Path(filename).stem}') + if not bids_keys["task"]: + LGR.warning( + f"The heuristic {heur_file} could not deal with" f"{Path(filename).stem}" + ) raise KeyError('No "task" attribute found') # Compose name by looping in the bids_keys dictionary # and adding nonempty keys - name = '' + name = "" for key in bids_keys: - if bids_keys[key] != '': - name = f'{name}{key}-{bids_keys[key]}_' + if bids_keys[key] != "": + name = f"{name}{key}-{bids_keys[key]}_" # Finish path, create it, add filename, export - fldr = os.path.join(fldr, 'func') + fldr = os.path.join(fldr, "func") utils.path_exists_or_make_it(fldr) - heurpath = os.path.join(fldr, f'{name}physio') + heurpath = os.path.join(fldr, f"{name}physio") return heurpath @@ -276,43 +353,45 @@ def participants_file(outdir, yml, sub): Subject ID. """ - LGR.info('Updating participants.tsv ...') - file_path = os.path.join(outdir, 'participants.tsv') + LGR.info("Updating participants.tsv ...") + file_path = os.path.join(outdir, "participants.tsv") if not os.path.exists(file_path): - LGR.warning('phys2bids could not find participants.tsv') + LGR.warning("phys2bids could not find participants.tsv") # Read yaml info if file exists - if '.yml' in yml and os.path.exists(yml): - LGR.info('Using yaml data to populate participants.tsv') + if ".yml" in yml and os.path.exists(yml): + LGR.info("Using yaml data to populate participants.tsv") with open(yml) as f: yaml_data = yaml.load(f, Loader=yaml.FullLoader) - p_id = f'sub-{sub}' - p_age = yaml_data['participant']['age'] - p_sex = yaml_data['participant']['sex'] - p_handedness = yaml_data['participant']['handedness'] + p_id = f"sub-{sub}" + p_age = yaml_data["participant"]["age"] + p_sex = yaml_data["participant"]["sex"] + p_handedness = yaml_data["participant"]["handedness"] else: - LGR.info('No yaml file was provided. Using phys2bids data to ' - 'populate participants.tsv') + LGR.info( + "No yaml file was provided. Using phys2bids data to " + "populate participants.tsv" + ) # Fill in with data from phys2bids - p_id = f'sub-{sub}' - p_age = 'n/a' - p_sex = 'n/a' - p_handedness = 'n/a' + p_id = f"sub-{sub}" + p_age = "n/a" + p_sex = "n/a" + p_handedness = "n/a" # Write to participants.tsv file - header = ['participant_id', 'age', 'sex', 'handedness'] + header = ["participant_id", "age", "sex", "handedness"] utils.append_list_as_row(file_path, header) participants_data = [p_id, p_age, p_sex, p_handedness] utils.append_list_as_row(file_path, participants_data) else: # If participants.tsv exists only update when subject is not there - LGR.info('phys2bids found participants.tsv. Updating if needed...') + LGR.info("phys2bids found participants.tsv. Updating if needed...") # Find participant_id column in header - pf = open(file_path, 'r') + pf = open(file_path, "r") header = pf.readline().split("\t") header_length = len(header) pf.close() - p_id_idx = header.index('participant_id') + p_id_idx = header.index("participant_id") # Check if subject is already in the file sub_exists = False @@ -324,9 +403,9 @@ def participants_file(outdir, yml, sub): break # Only append to file if subject is not in the file if not sub_exists: - LGR.info(f'Appending subjet sub-{sub} to participants.tsv ...') - participants_data = ['n/a'] * header_length - participants_data[p_id_idx] = f'sub-{sub}' + LGR.info(f"Appending subjet sub-{sub} to participants.tsv ...") + participants_data = ["n/a"] * header_length + participants_data[p_id_idx] = f"sub-{sub}" utils.append_list_as_row(file_path, participants_data) @@ -343,13 +422,18 @@ def dataset_description_file(outdir): """ # dictionary that will be written for the basic dataset description version - data_dict = {"Name": os.path.splitext(os.path.basename(outdir))[0], - "BIDSVersion": "1.4.0", "DatasetType": "raw"} - file_path = os.path.join(outdir, 'dataset_description.json') + data_dict = { + "Name": os.path.splitext(os.path.basename(outdir))[0], + "BIDSVersion": "1.4.0", + "DatasetType": "raw", + } + file_path = os.path.join(outdir, "dataset_description.json") # check if dataset_description.json exists, if it doesn't create it if not os.path.exists(file_path): - LGR.warning('phys2bids could not find dataset_description.json,' - 'generating it with provided info') + LGR.warning( + "phys2bids could not find dataset_description.json," + "generating it with provided info" + ) utils.writejson(file_path, data_dict) @@ -365,9 +449,11 @@ def readme_file(outdir): Full path to the output directory. """ - file_path = os.path.join(outdir, 'README.md') + file_path = os.path.join(outdir, "README.md") if not os.path.exists(file_path): - text = 'Empty README, please fill in describing the dataset in more detail.' - LGR.warning('phys2bids could not find README,' - 'generating it EMPTY, please fill in the necessary info') - utils.writefile(file_path, '', text) + text = "Empty README, please fill in describing the dataset in more detail." + LGR.warning( + "phys2bids could not find README," + "generating it EMPTY, please fill in the necessary info" + ) + utils.writefile(file_path, "", text) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index 48fa577be..becc387d8 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -47,34 +47,37 @@ def load_scan_data(layout, sub, ses=None): # Collect acquisition times # NOTE: Will be replaced with scans file when heudiconv makes the change - img_files = layout.get(datatype='func', suffix='bold', - extension=['.nii.gz', '.nii'], - subject=sub, session=ses) + img_files = layout.get( + datatype="func", + suffix="bold", + extension=[".nii.gz", ".nii"], + subject=sub, + session=ses, + ) df = pd.DataFrame( - columns=['original_filename', 'acq_time'], + columns=["original_filename", "acq_time"], ) for i, img_file in enumerate(img_files): - df.loc[i, 'original_filename'] = img_file.path - df.loc[i, 'acq_time'] = img_file.get_metadata()['AcquisitionTime'] + df.loc[i, "original_filename"] = img_file.path + df.loc[i, "acq_time"] = img_file.get_metadata()["AcquisitionTime"] # Get generic filenames (without within-acquisition entities like echo) - df['filename'] = df['original_filename'].apply(update_bids_name, - echo=None, fa=None, - inv=None, mt=None, part=None, - ch=None) + df["filename"] = df["original_filename"].apply( + update_bids_name, echo=None, fa=None, inv=None, mt=None, part=None, ch=None + ) # Get "first" scan from multi-file acquisitions - df['acq_time'] = pd.to_datetime(df['acq_time']) - df = df.sort_values(by='acq_time') - df = df.drop_duplicates(subset='filename', keep='first', ignore_index=True) + df["acq_time"] = pd.to_datetime(df["acq_time"]) + df = df.sort_values(by="acq_time") + df = df.drop_duplicates(subset="filename", keep="first", ignore_index=True) # Now back to general-purpose code df = determine_scan_durations(layout, df, sub=sub, ses=ses) - df = df.dropna(subset=['duration']) # limit to relevant scans + df = df.dropna(subset=["duration"]) # limit to relevant scans # Convert scan times to relative onsets (first scan is at 0 seconds) - df['onset'] = (df['acq_time'] - df['acq_time'].min()) - df['onset'] = df['onset'].dt.total_seconds() + df["onset"] = df["acq_time"] - df["acq_time"].min() + df["onset"] = df["onset"].dt.total_seconds() return df @@ -102,19 +105,23 @@ def determine_scan_durations(layout, scan_df, sub, ses=None): Updated DataFrame with new "duration" column. Calculated durations are in seconds. """ - func_files = layout.get(datatype='func', suffix='bold', - extension=['.nii.gz', '.nii'], - subject=sub, session=ses) - scan_df['duration'] = None + func_files = layout.get( + datatype="func", + suffix="bold", + extension=[".nii.gz", ".nii"], + subject=sub, + session=ses, + ) + scan_df["duration"] = None for func_file in func_files: filename = func_file.path - if filename in scan_df['original_filename'].values: + if filename in scan_df["original_filename"].values: n_vols = nib.load(filename).shape[3] - tr = func_file.get_metadata()['RepetitionTime'] + tr = func_file.get_metadata()["RepetitionTime"] duration = n_vols * tr - scan_df.loc[scan_df['original_filename'] == filename, 'duration'] = duration + scan_df.loc[scan_df["original_filename"] == filename, "duration"] = duration else: - LGR.info('Skipping {}'.format(op.basename(filename))) + LGR.info("Skipping {}".format(op.basename(filename))) return scan_df @@ -139,7 +146,7 @@ def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): onset (in seconds), index (onsets in index of time series array), and duration (in seconds). """ - samplerate = 1. / freq + samplerate = 1.0 / freq scan_idx = np.where(trigger_timeseries > 0)[0] # Get groups of consecutive numbers in index groups = [] @@ -152,11 +159,11 @@ def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): durations = np.array([g[-1] - g[0] for g in groups]) durations_in_sec = durations * samplerate df = pd.DataFrame( - columns=['onset'], + columns=["onset"], data=onsets_in_sec, ) - df['index'] = onsets - df['duration'] = durations_in_sec + df["index"] = onsets + df["duration"] = durations_in_sec return df @@ -190,14 +197,14 @@ def synchronize_onsets(phys_df, scan_df): seconds and in indices of the physio trigger channel, as well as scan duration in units of the physio trigger channel. """ - phys_df = phys_df.sort_values(by=['onset']) - scan_df = scan_df.sort_values(by=['onset']) + phys_df = phys_df.sort_values(by=["onset"]) + scan_df = scan_df.sort_values(by=["onset"]) # Get difference between each physio trigger onset and each scan onset onset_diffs = np.zeros((scan_df.shape[0], phys_df.shape[0])) for i, i_scan in scan_df.iterrows(): for j, j_phys in phys_df.iterrows(): - onset_diff = j_phys['onset'] - i_scan['onset'] + onset_diff = j_phys["onset"] - i_scan["onset"] onset_diffs[i, j] = onset_diff # Find the delay that gives the smallest difference between scan onsets @@ -230,17 +237,19 @@ def synchronize_onsets(phys_df, scan_df): min_val = min(min_diffs_tmp) min_diffs += min_val offset += min_val - LGR.info('Scan onsets should be adjusted forward by {} seconds to best ' - 'match physio onsets.'.format(offset)) + LGR.info( + "Scan onsets should be adjusted forward by {} seconds to best " + "match physio onsets.".format(offset) + ) # Get onset of each scan in terms of the physio time series - scan_df['phys_onset'] = scan_df['onset'] + offset - rise = (phys_df.loc[1, 'index'] - phys_df.loc[0, 'index']) - run = (phys_df.loc[1, 'onset'] - phys_df.loc[0, 'onset']) + scan_df["phys_onset"] = scan_df["onset"] + offset + rise = phys_df.loc[1, "index"] - phys_df.loc[0, "index"] + run = phys_df.loc[1, "onset"] - phys_df.loc[0, "onset"] samplerate = rise / run - scan_df['index_onset'] = (scan_df['phys_onset'] * samplerate).astype(int) - scan_df['index_duration'] = (scan_df['duration'] * samplerate).astype(int) - scan_df['index_offset'] = scan_df['index_onset'] + scan_df['index_duration'] + scan_df["index_onset"] = (scan_df["phys_onset"] * samplerate).astype(int) + scan_df["index_duration"] = (scan_df["duration"] * samplerate).astype(int) + scan_df["index_offset"] = scan_df["index_onset"] + scan_df["index_duration"] return scan_df @@ -271,11 +280,19 @@ def plot_sync(scan_df, physio_df): fig, axes = plt.subplots(nrows=2, figsize=(20, 6), sharex=True) # get max (onset time + duration) rounded up to nearest 1000 - max_ = int(1000 * np.ceil(max(( - (physio_df['onset'] + physio_df['duration']).max(), - (scan_df['onset'] + scan_df['duration']).max(), - (scan_df['phys_onset'] + scan_df['duration']).max() - )) / 1000)) + max_ = int( + 1000 + * np.ceil( + max( + ( + (physio_df["onset"] + physio_df["duration"]).max(), + (scan_df["onset"] + scan_df["duration"]).max(), + (scan_df["phys_onset"] + scan_df["duration"]).max(), + ) + ) + / 1000 + ) + ) # get x-axis values scalar = 10 @@ -287,44 +304,70 @@ def plot_sync(scan_df, physio_df): func_timeseries = np.zeros(x.shape) for i, row in scan_df.iterrows(): func_timeseries[ - int(row['onset'] * scalar):int((row['onset'] + row['duration']) * scalar) + int(row["onset"] * scalar) : int((row["onset"] + row["duration"]) * scalar) ] = 1 for i, row in physio_df.iterrows(): physio_timeseries[ - int(row['onset'] * scalar):int((row['onset'] + row['duration']) * scalar) + int(row["onset"] * scalar) : int((row["onset"] + row["duration"]) * scalar) ] = 0.5 - axes[0].fill_between(x, func_timeseries, where=func_timeseries >= 0, - interpolate=True, color='red', alpha=0.3, - label='Functional scans') - axes[0].fill_between(x, physio_timeseries, where=physio_timeseries >= 0, - interpolate=True, color='blue', alpha=0.3, - label='Physio triggers') + axes[0].fill_between( + x, + func_timeseries, + where=func_timeseries >= 0, + interpolate=True, + color="red", + alpha=0.3, + label="Functional scans", + ) + axes[0].fill_between( + x, + physio_timeseries, + where=physio_timeseries >= 0, + interpolate=True, + color="blue", + alpha=0.3, + label="Physio triggers", + ) # now plot the adjusted onsets (and original durations) in the bottom axis physio_timeseries = np.zeros(x.shape) func_timeseries = np.zeros(x.shape) for i, row in scan_df.iterrows(): func_timeseries[ - int(row['phys_onset'] * scalar):int((row['phys_onset'] + row['duration']) * scalar) + int(row["phys_onset"] * scalar) : int( + (row["phys_onset"] + row["duration"]) * scalar + ) ] = 1 for i, row in physio_df.iterrows(): physio_timeseries[ - int(row['onset'] * scalar):int((row['onset'] + row['duration']) * scalar) + int(row["onset"] * scalar) : int((row["onset"] + row["duration"]) * scalar) ] = 0.5 - axes[1].fill_between(x, func_timeseries, where=func_timeseries >= 0, - interpolate=True, color='red', alpha=0.3, - label='Functional scans') - axes[1].fill_between(x, physio_timeseries, where=physio_timeseries >= 0, - interpolate=True, color='blue', alpha=0.3, - label='Physio triggers') + axes[1].fill_between( + x, + func_timeseries, + where=func_timeseries >= 0, + interpolate=True, + color="red", + alpha=0.3, + label="Functional scans", + ) + axes[1].fill_between( + x, + physio_timeseries, + where=physio_timeseries >= 0, + interpolate=True, + color="blue", + alpha=0.3, + label="Physio triggers", + ) axes[0].set_xlim((min(x), max(x))) axes[0].set_ylim((0, None)) - axes[1].set_xlabel('Time (s)') + axes[1].set_xlabel("Time (s)") axes[0].legend() return fig, axes @@ -394,15 +437,17 @@ def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False): # we should do something better with this figure, but it's nice to have for QC fig, axes = plot_sync(scan_df, physio_df) - fig.savefig('synchronization_results.png') + fig.savefig("synchronization_results.png") run_dict = {} # could probably be replaced with apply() followed by to_dict() for _, row in scan_df.iterrows(): - base_fname = update_bids_name(row['filename'], suffix='physio', extension='') - split_times = (row['index_onset'], row['index_offset']) + base_fname = update_bids_name(row["filename"], suffix="physio", extension="") + split_times = (row["index_onset"], row["index_offset"]) run_dict[base_fname] = split_times - phys_dict = slice_phys(physio, run_dict, padding=padding, update_trigger=update_trigger) + phys_dict = slice_phys( + physio, run_dict, padding=padding, update_trigger=update_trigger + ) outputs = [] for k, v in phys_dict.items(): output = BlueprintOutput.init_from_blueprint(v) diff --git a/phys2bids/physio_obj.py b/phys2bids/physio_obj.py index 04712905f..e1f8ef57c 100644 --- a/phys2bids/physio_obj.py +++ b/phys2bids/physio_obj.py @@ -40,7 +40,7 @@ def is_valid(var, var_type, list_type=None): If var is not of var_type """ if not isinstance(var, var_type): - raise AttributeError(f'The given variable is not a {var_type}') + raise AttributeError(f"The given variable is not a {var_type}") if var_type is list and list_type is not None: for element in var: @@ -104,7 +104,7 @@ def are_equal(self, other): def _deal_with_dict_value_error(self, other): # Check if "self" has a 'timeseries' key. If not, return False. try: - self['timeseries'] + self["timeseries"] except KeyError: return False except TypeError: @@ -113,16 +113,18 @@ def _deal_with_dict_value_error(self, other): # Check that the two objects have the same keys. # If not, return False, otherwise loop through the timeseries key. if self.keys() == other.keys(): - alltrue_timeseries = [False] * len(self['timeseries']) + alltrue_timeseries = [False] * len(self["timeseries"]) alltrue_keys = [False] * len(self) for j, key in enumerate(self.keys()): - if key == 'timeseries': - for i in range(len(self['timeseries'])): - alltrue_timeseries[i] = (self['timeseries'][i].all() - == other['timeseries'][i].all()) + if key == "timeseries": + for i in range(len(self["timeseries"])): + alltrue_timeseries[i] = ( + self["timeseries"][i].all() + == other["timeseries"][i].all() + ) alltrue_keys[j] = all(alltrue_timeseries) else: - alltrue_keys[j] = (self[key] == other[key]) + alltrue_keys[j] = self[key] == other[key] return all(alltrue_keys) else: return False @@ -155,7 +157,7 @@ def _deal_with_dict_value_error(self, other): return _deal_with_dict_value_error(self, other) -class BlueprintInput(): +class BlueprintInput: """ Main input object for phys2bids. @@ -227,15 +229,24 @@ class BlueprintInput(): - Actual number of channels +1 <= ch_amount """ - def __init__(self, timeseries, freq, ch_name, units, trigger_idx, - num_timepoints_found=None, thr=None, time_offset=0): + def __init__( + self, + timeseries, + freq, + ch_name, + units, + trigger_idx, + num_timepoints_found=None, + thr=None, + time_offset=0, + ): """Initialise BlueprintInput (see class docstring).""" self.timeseries = is_valid(timeseries, list, list_type=np.ndarray) - self.freq = has_size(is_valid(freq, list, - list_type=(int, float)), - self.ch_amount, 0.0) - self.ch_name = has_size(ch_name, self.ch_amount, 'unknown') - self.units = has_size(units, self.ch_amount, '[]') + self.freq = has_size( + is_valid(freq, list, list_type=(int, float)), self.ch_amount, 0.0 + ) + self.ch_name = has_size(ch_name, self.ch_amount, "unknown") + self.units = has_size(units, self.ch_amount, "[]") self.trigger_idx = is_valid(trigger_idx, int) self.num_timepoints_found = num_timepoints_found self.thr = thr @@ -308,33 +319,43 @@ def __getitem__(self, idx): # Check that the indexes are not out of bounds if idx.start >= trigger_length or idx.stop > trigger_length: - raise IndexError(f'Slice ({idx.start}, {idx.stop}) is out of ' - f'bounds for channel {self.trigger_idx} ' - f'with size {trigger_length}') + raise IndexError( + f"Slice ({idx.start}, {idx.stop}) is out of " + f"bounds for channel {self.trigger_idx} " + f"with size {trigger_length}" + ) # Operate on each channel on its own for n, channel in enumerate(self.timeseries): - idx_dict = {'start': idx.start, 'stop': idx.stop, 'step': idx.step} + idx_dict = {"start": idx.start, "stop": idx.stop, "step": idx.step} # Adapt the slicing indexes to the right requency - for i in ['start', 'stop', 'step']: + for i in ["start", "stop", "step"]: if idx_dict[i]: - idx_dict[i] = int(np.floor(self.freq[n] - / self.freq[self.trigger_idx] - * idx_dict[i])) + idx_dict[i] = int( + np.floor( + self.freq[n] / self.freq[self.trigger_idx] * idx_dict[i] + ) + ) # Correct the slicing stop if necessary - if idx_dict['start'] == idx_dict['stop'] or return_instant: - idx_dict['stop'] = idx_dict['start'] + 1 + if idx_dict["start"] == idx_dict["stop"] or return_instant: + idx_dict["stop"] = idx_dict["start"] + 1 elif trigger_length == idx.stop: - idx_dict['stop'] = len(channel) + idx_dict["stop"] = len(channel) - new_idx = slice(idx_dict['start'], idx_dict['stop'], idx_dict['step']) + new_idx = slice(idx_dict["start"], idx_dict["stop"], idx_dict["step"]) sliced_timeseries[n] = channel[new_idx] - return BlueprintInput(sliced_timeseries, self.freq, self.ch_name, - self.units, self.trigger_idx, - self.num_timepoints_found, self.thr, - self.time_offset) + return BlueprintInput( + sliced_timeseries, + self.freq, + self.ch_name, + self.units, + self.trigger_idx, + self.num_timepoints_found, + self.thr, + self.time_offset, + ) def __eq__(self, other): """ @@ -368,13 +389,16 @@ def rename_channels(self, new_names): self.ch_name: list of str Changes content to new_name. """ - if 'time' in new_names: - del new_names[new_names.index('time')] + if "time" in new_names: + del new_names[new_names.index("time")] - new_names = ['time', ] + new_names + new_names = [ + "time", + ] + new_names - self.ch_name = has_size(is_valid(new_names, list, list_type=str), - self.ch_amount, 'unknown') + self.ch_name = has_size( + is_valid(new_names, list, list_type=str), self.ch_amount, "unknown" + ) def return_index(self, idx): """ @@ -391,8 +415,13 @@ def return_index(self, idx): Tuple containing the proper list entry of all the properties of the object with index `idx` """ - return (self.timeseries[idx], self.ch_amount, self.freq[idx], - self.ch_name[idx], self.units[idx]) + return ( + self.timeseries[idx], + self.ch_amount, + self.freq[idx], + self.ch_name[idx], + self.units[idx], + ) def delete_at_index(self, idx): """ @@ -423,8 +452,10 @@ def delete_at_index(self, idx): del self.units[idx] if self.trigger_idx == idx: - LGR.warning('Removing trigger channel - are you sure you are doing' - 'the right thing?') + LGR.warning( + "Removing trigger channel - are you sure you are doing" + "the right thing?" + ) self.trigger_idx = 0 def check_trigger_amount(self, thr=None, num_timepoints_expected=0, tr=0): @@ -454,56 +485,67 @@ def check_trigger_amount(self, thr=None, num_timepoints_expected=0, tr=0): The property `timeseries` is shifted with the 0 being the time of first trigger. """ - LGR.info('Counting trigger points') + LGR.info("Counting trigger points") # Use the trigger channel to find the TRs, # comparing it to a given threshold. trigger = self.timeseries[self.trigger_idx] - LGR.info(f'The trigger is in channel {self.trigger_idx}') + LGR.info(f"The trigger is in channel {self.trigger_idx}") flag = 0 if thr is None: thr = np.mean(trigger) + 2 * np.std(trigger) flag = 1 timepoints = trigger > thr - num_timepoints_found = len([is_true for is_true, _ in groupby(timepoints, - lambda x: x != 0) if is_true]) + num_timepoints_found = len( + [is_true for is_true, _ in groupby(timepoints, lambda x: x != 0) if is_true] + ) if flag == 1: - LGR.info(f'The number of timepoints according to the std_thr method ' - f'is {num_timepoints_found}. The computed threshold is {thr:.4f}') + LGR.info( + f"The number of timepoints according to the std_thr method " + f"is {num_timepoints_found}. The computed threshold is {thr:.4f}" + ) else: - LGR.info(f'The number of timepoints found with the manual threshold of {thr:.4f} ' - f'is {num_timepoints_found}') + LGR.info( + f"The number of timepoints found with the manual threshold of {thr:.4f} " + f"is {num_timepoints_found}" + ) time_offset = self.timeseries[0][timepoints.argmax()] if num_timepoints_expected: - LGR.info('Checking number of timepoints') + LGR.info("Checking number of timepoints") if num_timepoints_found > num_timepoints_expected: - timepoints_extra = (num_timepoints_found - - num_timepoints_expected) - LGR.warning(f'Found {timepoints_extra} timepoints' - ' more than expected!\n' - 'Assuming extra timepoints are at the end ' - '(try again with a more liberal thr)') + timepoints_extra = num_timepoints_found - num_timepoints_expected + LGR.warning( + f"Found {timepoints_extra} timepoints" + " more than expected!\n" + "Assuming extra timepoints are at the end " + "(try again with a more liberal thr)" + ) elif num_timepoints_found < num_timepoints_expected: - timepoints_missing = (num_timepoints_expected - - num_timepoints_found) - LGR.warning(f'Found {timepoints_missing} timepoints' - ' less than expected!') + timepoints_missing = num_timepoints_expected - num_timepoints_found + LGR.warning( + f"Found {timepoints_missing} timepoints" " less than expected!" + ) if tr: - LGR.warning('Correcting time offset, assuming missing ' - 'timepoints are at the beginning (try again ' - 'with a more conservative thr)') - time_offset -= (timepoints_missing * tr) + LGR.warning( + "Correcting time offset, assuming missing " + "timepoints are at the beginning (try again " + "with a more conservative thr)" + ) + time_offset -= timepoints_missing * tr else: - LGR.warning('Can\'t correct time offset - you should ' - 'specify the TR') + LGR.warning( + "Can't correct time offset - you should " "specify the TR" + ) else: - LGR.info('Found just the right amount of timepoints!') + LGR.info("Found just the right amount of timepoints!") else: - LGR.warning('The necessary options to find the amount of timepoints ' - 'were not provided.') + LGR.warning( + "The necessary options to find the amount of timepoints " + "were not provided." + ) self.thr = thr self.time_offset = time_offset self.timeseries[0] -= time_offset @@ -525,17 +567,20 @@ def print_info(self, filename): Returns to stdout (e.g. on screen) channels, their names and their sampling rate. """ - info = (f'\n------------------------------------------------' - f'\nFile {filename} contains:\n') + info = ( + f"\n------------------------------------------------" + f"\nFile {filename} contains:\n" + ) for ch in range(1, self.ch_amount): - info = info + (f'{ch:02d}. {self.ch_name[ch]};' - f' sampled at {self.freq[ch]} Hz\n') - info = info + '------------------------------------------------\n' + info = info + ( + f"{ch:02d}. {self.ch_name[ch]};" f" sampled at {self.freq[ch]} Hz\n" + ) + info = info + "------------------------------------------------\n" LGR.info(info) -class BlueprintOutput(): +class BlueprintOutput: """ Main output object for phys2bids. @@ -576,12 +621,12 @@ class BlueprintOutput(): method to populate from input blueprint instead of init """ - def __init__(self, timeseries, freq, ch_name, units, start_time, filename=''): + def __init__(self, timeseries, freq, ch_name, units, start_time, filename=""): """Initialise BlueprintOutput (see class docstring).""" self.timeseries = is_valid(timeseries, np.ndarray) self.freq = is_valid(freq, (int, float)) - self.ch_name = has_size(ch_name, self.ch_amount, 'unknown') - self.units = has_size(units, self.ch_amount, '[]') + self.ch_name = has_size(ch_name, self.ch_amount, "unknown") + self.units = has_size(units, self.ch_amount, "[]") self.start_time = start_time self.filename = is_valid(filename, str) @@ -627,8 +672,14 @@ def return_index(self, idx): Tuple containing the proper list entry of all the properties of the object with index `idx` """ - return (self.timeseries[:, idx], self.ch_amount, self.freq, - self.ch_name[idx], self.units[idx], self.start_time) + return ( + self.timeseries[:, idx], + self.ch_amount, + self.freq, + self.ch_name[idx], + self.units[idx], + self.start_time, + ) def delete_at_index(self, idx): """ @@ -682,7 +733,7 @@ def init_from_blueprint(cls, blueprint): return cls(timeseries, freq, ch_name, units, start_time) def save(self): - np.savetxt(self.filename + '.tsv.gz', - self.timeseries, fmt='%.8e', delimiter='\t') - save_json(self.filename + '.json', self.freq, - self.start_time, self.ch_name) + np.savetxt( + self.filename + ".tsv.gz", self.timeseries, fmt="%.8e", delimiter="\t" + ) + save_json(self.filename + ".json", self.freq, self.start_time, self.ch_name) From bb8166f58151d99ebc8005e3411331e5ab2ed84e Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 10 Oct 2020 11:07:36 -0400 Subject: [PATCH 26/28] Move test of function I'd renamed. --- phys2bids/tests/test_phys2bids.py | 20 -------------------- phys2bids/tests/test_utils.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/phys2bids/tests/test_phys2bids.py b/phys2bids/tests/test_phys2bids.py index 3593a5828..ed7cd3bc0 100644 --- a/phys2bids/tests/test_phys2bids.py +++ b/phys2bids/tests/test_phys2bids.py @@ -24,26 +24,6 @@ def test_print_summary(tmpdir): assert os.path.isfile(str(test_outfile_log)) -def test_print_json(tmpdir): - test_outfile = tmpdir.join('json_test') - test_outfile_json = tmpdir.join('json_test.json') - test_samp_freq = 0.1 - test_time_offset = -0.5 - test_ch_name = 'foo' - - phys2bids.print_json(str(test_outfile), test_samp_freq, test_time_offset, test_ch_name) - - assert os.path.isfile(test_outfile_json) - - test_json_data = dict(SamplingFrequency=0.1, - StartTime=0.5, - Columns='foo') - with open(test_outfile_json, 'r') as src: - loaded_data = json.load(src) - - assert test_json_data == loaded_data - - def test_raise_exception(samefreq_full_acq_file): test_filename = 'input.txt' diff --git a/phys2bids/tests/test_utils.py b/phys2bids/tests/test_utils.py index 125f117ef..4f8d03129 100644 --- a/phys2bids/tests/test_utils.py +++ b/phys2bids/tests/test_utils.py @@ -86,6 +86,26 @@ def test_writefile(tmpdir): utils.writefile(test_old_path, ext, test_text) +def test_save_json(tmpdir): + test_outfile = tmpdir.join('json_test') + test_outfile_json = tmpdir.join('json_test.json') + test_samp_freq = 0.1 + test_time_offset = -0.5 + test_ch_name = 'foo' + + utils.save_json(str(test_outfile), test_samp_freq, test_time_offset, test_ch_name) + + assert os.path.isfile(test_outfile_json) + + test_json_data = dict(SamplingFrequency=0.1, + StartTime=0.5, + Columns='foo') + with open(test_outfile_json, 'r') as src: + loaded_data = json.load(src) + + assert test_json_data == loaded_data + + # Tests writejson def test_writejson(tmpdir): test_json_filename = tmpdir.join('foo') From 428dd9fae02d2f363dbf400a443b876ed23a3a81 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 10 Oct 2020 11:47:24 -0400 Subject: [PATCH 27/28] Fix style issues. Docstring rules are way too aggressive. Also, E203 should be ignored. It's wrong. --- phys2bids/bids.py | 3 +-- phys2bids/conversion.py | 25 +++++++++++++------------ phys2bids/physio_obj.py | 7 +++++++ 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/phys2bids/bids.py b/phys2bids/bids.py index 50b6c3b63..f5632d55e 100644 --- a/phys2bids/bids.py +++ b/phys2bids/bids.py @@ -101,8 +101,7 @@ def update_bids_name(basename, **kwargs): """ - Add entities, suffix, and/or extension to a BIDS filename while retaining - BIDS compatibility. + Add elements to a BIDS filename while retaining BIDS compatibility. Parameters ---------- diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index becc387d8..bb74c0fb7 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -1,6 +1,5 @@ """ -A small module (to be broken up) with functions for synchronizing multi-run -physio files with pre-converted BIDS imaging data. +Functions for synchronizing multi-run physio files with pre-converted BIDS imaging data. """ import os.path as op from operator import itemgetter @@ -83,6 +82,8 @@ def load_scan_data(layout, sub, ses=None): def determine_scan_durations(layout, scan_df, sub, ses=None): """ + Determine scan durations. + Extract scan durations by loading fMRI files/metadata and multiplying TR by number of volumes. This can be used to determine the endpoints for the physio files. @@ -127,8 +128,7 @@ def determine_scan_durations(layout, scan_df, sub, ses=None): def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): """ - Collect onsets from physio file, both in terms of seconds and time series - indices. + Collect onsets from physio file, both in terms of seconds and time series indices. Parameters ---------- @@ -169,9 +169,9 @@ def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): def synchronize_onsets(phys_df, scan_df): """ - Find matching scans and physio trigger periods from separate DataFrames, - using time differences within each DataFrame. + Find matching scans and physio trigger periods from separate DataFrames. + Uses time differences within each DataFrame. There can be fewer physios than scans (task failed to trigger physio) or fewer scans than physios (aborted scans are not retained in BIDS dataset). @@ -304,12 +304,12 @@ def plot_sync(scan_df, physio_df): func_timeseries = np.zeros(x.shape) for i, row in scan_df.iterrows(): func_timeseries[ - int(row["onset"] * scalar) : int((row["onset"] + row["duration"]) * scalar) + int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar) ] = 1 for i, row in physio_df.iterrows(): physio_timeseries[ - int(row["onset"] * scalar) : int((row["onset"] + row["duration"]) * scalar) + int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar) ] = 0.5 axes[0].fill_between( @@ -336,14 +336,14 @@ def plot_sync(scan_df, physio_df): func_timeseries = np.zeros(x.shape) for i, row in scan_df.iterrows(): func_timeseries[ - int(row["phys_onset"] * scalar) : int( + int(row["phys_onset"] * scalar): int( (row["phys_onset"] + row["duration"]) * scalar ) ] = 1 for i, row in physio_df.iterrows(): physio_timeseries[ - int(row["onset"] * scalar) : int((row["onset"] + row["duration"]) * scalar) + int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar) ] = 0.5 axes[1].fill_between( @@ -374,8 +374,9 @@ def plot_sync(scan_df, physio_df): def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False): """ - A potential workflow for running physio/scan onset synchronization and - BIDSification. This workflow writes out physio files to a BIDS dataset. + Run physio/scan onset synchronization and BIDSification. + + This workflow writes out physio files to a BIDS dataset. Parameters ---------- diff --git a/phys2bids/physio_obj.py b/phys2bids/physio_obj.py index e1f8ef57c..227c13d00 100644 --- a/phys2bids/physio_obj.py +++ b/phys2bids/physio_obj.py @@ -733,6 +733,13 @@ def init_from_blueprint(cls, blueprint): return cls(timeseries, freq, ch_name, units, start_time) def save(self): + """ + Save object to BIDS-format pair of files. + + Notes + ----- + Saves a tsv.gz file and a json file. + """ np.savetxt( self.filename + ".tsv.gz", self.timeseries, fmt="%.8e", delimiter="\t" ) From bc7698a0e3e45b886789c3dff1abdd390c6aafbc Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Sat, 10 Oct 2020 12:44:08 -0400 Subject: [PATCH 28/28] Update conversion.py --- phys2bids/conversion.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py index bb74c0fb7..354a28422 100644 --- a/phys2bids/conversion.py +++ b/phys2bids/conversion.py @@ -1,6 +1,4 @@ -""" -Functions for synchronizing multi-run physio files with pre-converted BIDS imaging data. -""" +"""Functions for synchronizing multi-run physio files with BIDS imaging data.""" import os.path as op from operator import itemgetter from itertools import groupby