diff --git a/phys2bids/bids.py b/phys2bids/bids.py index ccaa8d386..801dc05be 100644 --- a/phys2bids/bids.py +++ b/phys2bids/bids.py @@ -1,6 +1,7 @@ """BIDS functions for phys2bids package.""" import logging import os +import re from csv import reader from pathlib import Path @@ -11,48 +12,191 @@ LGR = logging.getLogger(__name__) UNIT_ALIASES = { - # kelvin: thermodynamic temperature - 'kelvin': 'K', 'kelvins': 'K', - # mole: amount of substance - 'mol': 'mol', 'mole': 'mol', - # newton: force, weight - 'newton': 'N', 'newtons': 'N', - # pascal: pressure, stress - 'pascal': 'Pa', 'pascals': 'Pa', 'pa': 'Pa', - # volt: voltage (electrical potential), emf - 'volt': 'V', 'volts': 'V', - # degree Celsius: temperature relative to 273.15 K - '°c': '°C', '°celsius': '°C', 'celsius': '°C', - # ampere: electric current - 'ampere': 'A', 'amp': 'A', 'amps': 'A', - # second: time and hertzs: frequency - # siemens: electric conductance (e.g. EDA) - 'siemens': 'S', - # second: time and hertzs - '1/hz': 's', '1/hertz': 's', 'hz': 'Hz', - '1/s': 'Hz', '1/second': 'Hz', '1/seconds': 'Hz', - '1/sec': 'Hz', '1/secs': 'Hz', 'hertz': 'Hz', - 'second': 's', 'seconds': 's', 'sec': 's', - 'secs': 's', - # All the aliases with one letter (to avoid issues) - 'k': 'K', 'n': 'N', 'v': 'V', 'c': '°C', 'a': 'A', 's': 's', + # kelvin: thermodynamic temperature + "kelvin": "K", + "kelvins": "K", + # mole: amount of substance + "mol": "mol", + "mole": "mol", + # newton: force, weight + "newton": "N", + "newtons": "N", + # pascal: pressure, stress + "pascal": "Pa", + "pascals": "Pa", + "pa": "Pa", + # volt: voltage (electrical potential), emf + "volt": "V", + "volts": "V", + # degree Celsius: temperature relative to 273.15 K + "°c": "°C", + "°celsius": "°C", + "celsius": "°C", + # ampere: electric current + "ampere": "A", + "amp": "A", + "amps": "A", + # second: time and hertzs: frequency + # siemens: electric conductance (e.g. EDA) + "siemens": "S", + # second: time and hertzs + "1/hz": "s", + "1/hertz": "s", + "hz": "Hz", + "1/s": "Hz", + "1/second": "Hz", + "1/seconds": "Hz", + "1/sec": "Hz", + "1/secs": "Hz", + "hertz": "Hz", + "second": "s", + "seconds": "s", + "sec": "s", + "secs": "s", + # All the aliases with one letter (to avoid issues) + "k": "K", + "n": "N", + "v": "V", + "c": "°C", + "a": "A", + "s": "s", } # Init dictionary of aliases for multipliers. Entries are still lowercase PREFIX_ALIASES = { - # Multiples - skip "mega" and only up to "tera" - 'da': 'da', 'deca': 'da', 'h': 'h', 'hecto': 'h', - 'k': 'k', 'kilo': 'k', 'g': 'G', 'giga': 'G', 't': 'T', - 'tera': 'T', - # Submultipliers - 'd': 'd', 'deci': 'd', 'c': 'c', 'centi': 'c', - 'milli': 'm', 'm': 'm', 'µ': 'µ', 'micro': 'µ', - 'n': 'n', 'nano': 'n', 'p': 'p', 'pico': 'p', - 'f': 'f', 'femto': 'f', 'a': 'a', 'atto': 'a', - 'z': 'z', 'zepto': 'z', 'y': 'y', 'yocto': 'y', + # Multiples - skip "mega" and only up to "tera" + "da": "da", + "deca": "da", + "h": "h", + "hecto": "h", + "k": "k", + "kilo": "k", + "g": "G", + "giga": "G", + "t": "T", + "tera": "T", + # Submultipliers + "d": "d", + "deci": "d", + "c": "c", + "centi": "c", + "milli": "m", + "m": "m", + "µ": "µ", + "micro": "µ", + "n": "n", + "nano": "n", + "p": "p", + "pico": "p", + "f": "f", + "femto": "f", + "a": "a", + "atto": "a", + "z": "z", + "zepto": "z", + "y": "y", + "yocto": "y", } +def update_bids_name(basename, **kwargs): + """ + Add elements to a BIDS filename while retaining BIDS compatibility. + + Parameters + ---------- + basename : str + Name of valid BIDS file. + kwargs : dict + Keyword arguments indicating entities and/or suffix and/or extension + to update/add to the BIDS filename. If a keyword's value is None, then + the entity will be removed from the filename. + + Returns + ------- + outname : str + Valid BIDS filename with updated entities/suffix/extension. + """ + # This is hardcoded, but would be nice to use the yaml-fied BIDS entity + # table when that's up and running. + ENTITY_ORDER = [ + "sub", + "ses", + "task", + "acq", + "ce", + "rec", + "dir", + "run", + "mod", + "echo", + "fa", + "inv", + "mt", + "part", + "ch", + "recording", + "proc", + "space", + "split", + ] + + outdir = os.path.dirname(basename) + outname = os.path.basename(basename) + + # Determine scan suffix (should always be physio) + suffix = outname.split("_")[-1].split(".")[0] + extension = "." + ".".join(outname.split("_")[-1].split(".")[1:]) + filetype = suffix + extension + + for key, val in kwargs.items(): + if key == "suffix": + if not val.startswith("_"): + val = "_" + val + + if not val.endswith("."): + val = val + "." + + outname = outname.replace("_" + suffix + ".", val) + elif key == "extension": + # add leading . if not '' + if val and not val.startswith("."): + val = "." + val + outname = outname.replace(extension, val) + else: + # entities + if key not in ENTITY_ORDER: + raise ValueError("Key {} not understood.".format(key)) + + if val is None: + # remove entity from filename if kwarg val is None + regex = "_{}-[0-9a-zA-Z]+".format(key) + outname = re.sub(regex, "", outname) + elif "_{}-{}".format(key, val) in basename: + LGR.warning( + "Key-value pair {}/{} already found in " + "basename {}. Skipping.".format(key, val, basename) + ) + elif "_{}-".format(key) in basename: + LGR.warning( + "Key {} already found in basename {}. " + "Overwriting.".format(key, basename) + ) + regex = "_{}-[0-9a-zA-Z]+".format(key) + outname = re.sub(regex, "_{}-{}".format(key, val), outname) + else: + loc = ENTITY_ORDER.index(key) + entities_to_check = ENTITY_ORDER[loc:] + entities_to_check = ["_{}-".format(etc) for etc in entities_to_check] + entities_to_check.append("_{}".format(filetype)) + for etc in entities_to_check: + if etc in outname: + outname = outname.replace(etc, "_{}-{}{}".format(key, val, etc)) + break + outname = os.path.join(outdir, outname) + return outname + + def bidsify_units(orig_unit): """ Read the input unit of measure and use the dictionary of aliases to bidsify its value. @@ -81,26 +225,29 @@ def bidsify_units(orig_unit): # check that u_key is part of unit if unit.endswith(u_key): new_unit = UNIT_ALIASES[u_key] - unit = unit[:-len(u_key)] - if unit != '': + unit = unit[: -len(u_key)] + if unit != "": # for every prefix alias - prefix = PREFIX_ALIASES.get(unit, '') - if prefix == '': - LGR.warning(f'The given unit prefix {unit} does not ' - 'have aliases, passing it as is') - prefix = orig_unit[:len(unit)] + prefix = PREFIX_ALIASES.get(unit, "") + if prefix == "": + LGR.warning( + f"The given unit prefix {unit} does not " + "have aliases, passing it as is" + ) + prefix = orig_unit[: len(unit)] return prefix + new_unit else: return new_unit # If we conclude the loop without returning, it means the unit doesn't have aliases - LGR.warning(f'The given unit {orig_unit} does not have aliases, ' - f'passing it as is') + LGR.warning( + f"The given unit {orig_unit} does not have aliases, " f"passing it as is" + ) return orig_unit -def use_heuristic(heur_file, sub, ses, filename, outdir, run='', record_label=''): +def use_heuristic(heur_file, sub, ses, filename, outdir, run="", record_label=""): """ Import and use the heuristic specified by the user to rename the file. @@ -133,47 +280,57 @@ def use_heuristic(heur_file, sub, ses, filename, outdir, run='', record_label='' utils.check_file_exists(heur_file) # Initialise a dictionary of bids_keys that has already "recording" - bids_keys = {'sub': '', 'ses': '', 'task': '', 'acq': '', 'ce': '', - 'dir': '', 'rec': '', 'run': run, 'recording': record_label} + bids_keys = { + "sub": "", + "ses": "", + "task": "", + "acq": "", + "ce": "", + "dir": "", + "rec": "", + "run": run, + "recording": record_label, + } # Start filling bids_keys dictionary and path with subject and session - if sub.startswith('sub-'): - bids_keys['sub'] = sub[4:] + if sub.startswith("sub-"): + bids_keys["sub"] = sub[4:] fldr = os.path.join(outdir, sub) else: - bids_keys['sub'] = sub - fldr = os.path.join(outdir, f'sub-{sub}') + bids_keys["sub"] = sub + fldr = os.path.join(outdir, f"sub-{sub}") if ses: - if ses.startswith('ses-'): - bids_keys['ses'] = ses[4:] + if ses.startswith("ses-"): + bids_keys["ses"] = ses[4:] fldr = os.path.join(fldr, ses) else: - bids_keys['ses'] = ses - fldr = os.path.join(fldr, f'ses-{ses}') + bids_keys["ses"] = ses + fldr = os.path.join(fldr, f"ses-{ses}") # Load heuristic and use it to fill dictionary heur = utils.load_heuristic(heur_file) bids_keys.update(heur.heur(Path(filename).stem, run)) # If bids_keys['task'] is still empty, stop the program - if not bids_keys['task']: - LGR.warning(f'The heuristic {heur_file} could not deal with' - f'{Path(filename).stem}') + if not bids_keys["task"]: + LGR.warning( + f"The heuristic {heur_file} could not deal with" f"{Path(filename).stem}" + ) raise KeyError('No "task" attribute found') # Compose name by looping in the bids_keys dictionary # and adding nonempty keys - name = '' + name = "" for key in bids_keys: - if bids_keys[key] != '': - name = f'{name}{key}-{bids_keys[key]}_' + if bids_keys[key] != "": + name = f"{name}{key}-{bids_keys[key]}_" # Finish path, create it, add filename, export - fldr = os.path.join(fldr, 'func') + fldr = os.path.join(fldr, "func") os.makedirs(fldr, exist_ok=True) - heurpath = os.path.join(fldr, f'{name}physio') + heurpath = os.path.join(fldr, f"{name}physio") return heurpath @@ -195,43 +352,45 @@ def participants_file(outdir, yml, sub): Subject ID. """ - LGR.info('Updating participants.tsv ...') - file_path = os.path.join(outdir, 'participants.tsv') + LGR.info("Updating participants.tsv ...") + file_path = os.path.join(outdir, "participants.tsv") if not os.path.exists(file_path): - LGR.warning('phys2bids could not find participants.tsv') + LGR.warning("phys2bids could not find participants.tsv") # Read yaml info if file exists - if '.yml' in yml and os.path.exists(yml): - LGR.info('Using yaml data to populate participants.tsv') + if ".yml" in yml and os.path.exists(yml): + LGR.info("Using yaml data to populate participants.tsv") with open(yml) as f: yaml_data = yaml.load(f, Loader=yaml.FullLoader) - p_id = f'sub-{sub}' - p_age = yaml_data['participant']['age'] - p_sex = yaml_data['participant']['sex'] - p_handedness = yaml_data['participant']['handedness'] + p_id = f"sub-{sub}" + p_age = yaml_data["participant"]["age"] + p_sex = yaml_data["participant"]["sex"] + p_handedness = yaml_data["participant"]["handedness"] else: - LGR.info('No yaml file was provided. Using phys2bids data to ' - 'populate participants.tsv') + LGR.info( + "No yaml file was provided. Using phys2bids data to " + "populate participants.tsv" + ) # Fill in with data from phys2bids - p_id = f'sub-{sub}' - p_age = 'n/a' - p_sex = 'n/a' - p_handedness = 'n/a' + p_id = f"sub-{sub}" + p_age = "n/a" + p_sex = "n/a" + p_handedness = "n/a" # Write to participants.tsv file - header = ['participant_id', 'age', 'sex', 'handedness'] + header = ["participant_id", "age", "sex", "handedness"] utils.append_list_as_row(file_path, header) participants_data = [p_id, p_age, p_sex, p_handedness] utils.append_list_as_row(file_path, participants_data) else: # If participants.tsv exists only update when subject is not there - LGR.info('phys2bids found participants.tsv. Updating if needed...') + LGR.info("phys2bids found participants.tsv. Updating if needed...") # Find participant_id column in header - pf = open(file_path, 'r') + pf = open(file_path, "r") header = pf.readline().split("\t") header_length = len(header) pf.close() - p_id_idx = header.index('participant_id') + p_id_idx = header.index("participant_id") # Check if subject is already in the file sub_exists = False @@ -243,9 +402,9 @@ def participants_file(outdir, yml, sub): break # Only append to file if subject is not in the file if not sub_exists: - LGR.info(f'Appending subjet sub-{sub} to participants.tsv ...') - participants_data = ['n/a'] * header_length - participants_data[p_id_idx] = f'sub-{sub}' + LGR.info(f"Appending subjet sub-{sub} to participants.tsv ...") + participants_data = ["n/a"] * header_length + participants_data[p_id_idx] = f"sub-{sub}" utils.append_list_as_row(file_path, participants_data) @@ -262,13 +421,18 @@ def dataset_description_file(outdir): """ # dictionary that will be written for the basic dataset description version - data_dict = {"Name": os.path.splitext(os.path.basename(outdir))[0], - "BIDSVersion": "1.4.0", "DatasetType": "raw"} - file_path = os.path.join(outdir, 'dataset_description.json') + data_dict = { + "Name": os.path.splitext(os.path.basename(outdir))[0], + "BIDSVersion": "1.4.0", + "DatasetType": "raw", + } + file_path = os.path.join(outdir, "dataset_description.json") # check if dataset_description.json exists, if it doesn't create it if not os.path.exists(file_path): - LGR.warning('phys2bids could not find dataset_description.json,' - 'generating it with provided info') + LGR.warning( + "phys2bids could not find dataset_description.json," + "generating it with provided info" + ) utils.write_json(file_path, data_dict) @@ -284,9 +448,11 @@ def readme_file(outdir): Full path to the output directory. """ - file_path = os.path.join(outdir, 'README') + file_path = os.path.join(outdir, "README.md") if not os.path.exists(file_path): - text = 'Empty README, please fill in describing the dataset in more detail.' - LGR.warning('phys2bids could not find README,' - 'generating it EMPTY, please fill in the necessary info') - utils.write_file(file_path, '', text) + text = "Empty README, please fill in describing the dataset in more detail." + LGR.warning( + "phys2bids could not find README," + "generating it EMPTY, please fill in the necessary info" + ) + utils.write_file(file_path, "", text) diff --git a/phys2bids/conversion.py b/phys2bids/conversion.py new file mode 100644 index 000000000..354a28422 --- /dev/null +++ b/phys2bids/conversion.py @@ -0,0 +1,457 @@ +"""Functions for synchronizing multi-run physio files with BIDS imaging data.""" +import os.path as op +from operator import itemgetter +from itertools import groupby +import logging +import matplotlib.pyplot as plt + +from bids import BIDSLayout +import pandas as pd +import numpy as np +import nibabel as nib + +from .bids import update_bids_name +from .slice4phys import slice_phys +from .physio_obj import BlueprintOutput + +LGR = logging.getLogger(__name__) + + +def load_scan_data(layout, sub, ses=None): + """ + Extract subject- and session-specific scan onsets and durations from BIDSLayout. + + Parameters + ---------- + layout : BIDSLayout + sub : str + ses : str or None, optional + + Returns + ------- + df : pandas.DataFrame + DataFrame with the following columns: 'filename', 'original_filename', + 'acq_time', 'duration', 'onset'. + 'filename' is the name of the BIDS file minus within-acquisition entities. + 'original_filename' is the actual, existing BIDS file. + 'acq_time' is the acquisition time in datetime format. + 'duration' is the scan duration in seconds. + 'onset' is the scan onset in seconds, starting with zero at the first scan. + """ + # This is the strategy we'll use in the future. Commented out for now. + # scans_file = layout.get(extension='.tsv', suffix='scans', subject=sub, session=ses) + # df = pd.read_table(scans_file) + + # Collect acquisition times + # NOTE: Will be replaced with scans file when heudiconv makes the change + img_files = layout.get( + datatype="func", + suffix="bold", + extension=[".nii.gz", ".nii"], + subject=sub, + session=ses, + ) + df = pd.DataFrame( + columns=["original_filename", "acq_time"], + ) + for i, img_file in enumerate(img_files): + df.loc[i, "original_filename"] = img_file.path + df.loc[i, "acq_time"] = img_file.get_metadata()["AcquisitionTime"] + + # Get generic filenames (without within-acquisition entities like echo) + df["filename"] = df["original_filename"].apply( + update_bids_name, echo=None, fa=None, inv=None, mt=None, part=None, ch=None + ) + + # Get "first" scan from multi-file acquisitions + df["acq_time"] = pd.to_datetime(df["acq_time"]) + df = df.sort_values(by="acq_time") + df = df.drop_duplicates(subset="filename", keep="first", ignore_index=True) + + # Now back to general-purpose code + df = determine_scan_durations(layout, df, sub=sub, ses=ses) + df = df.dropna(subset=["duration"]) # limit to relevant scans + + # Convert scan times to relative onsets (first scan is at 0 seconds) + df["onset"] = df["acq_time"] - df["acq_time"].min() + df["onset"] = df["onset"].dt.total_seconds() + return df + + +def determine_scan_durations(layout, scan_df, sub, ses=None): + """ + Determine scan durations. + + Extract scan durations by loading fMRI files/metadata and + multiplying TR by number of volumes. This can be used to determine the + endpoints for the physio files. + + Parameters + ---------- + layout : bids.layout.BIDSLayout + Dataset layout. Used to identify functional scans and load them to + determine scan durations. + scan_df : pandas.DataFrame + Scans DataFrame containing functional scan filenames and onset times. + sub : str + Subject ID. + ses : str or None, optional + Session ID. If None, then no session. + + Returns + ------- + scan_df : pandas.DataFrame + Updated DataFrame with new "duration" column. Calculated durations are + in seconds. + """ + func_files = layout.get( + datatype="func", + suffix="bold", + extension=[".nii.gz", ".nii"], + subject=sub, + session=ses, + ) + scan_df["duration"] = None + for func_file in func_files: + filename = func_file.path + if filename in scan_df["original_filename"].values: + n_vols = nib.load(filename).shape[3] + tr = func_file.get_metadata()["RepetitionTime"] + duration = n_vols * tr + scan_df.loc[scan_df["original_filename"] == filename, "duration"] = duration + else: + LGR.info("Skipping {}".format(op.basename(filename))) + return scan_df + + +def extract_physio_onsets(trigger_timeseries, freq, threshold=0.5): + """ + Collect onsets from physio file, both in terms of seconds and time series indices. + + Parameters + ---------- + trigger_timeseries : 1D numpy.ndarray + Trigger time series. + freq : float + Frequency of trigger time series, in Hertz. + threshold : float, optional + Threshold to apply to binarize trigger time series. + + Returns + ------- + df : pandas.DataFrame + DataFrame with one row for each trigger period and three columns: + onset (in seconds), index (onsets in index of time series array), + and duration (in seconds). + """ + samplerate = 1.0 / freq + scan_idx = np.where(trigger_timeseries > 0)[0] + # Get groups of consecutive numbers in index + groups = [] + for k, g in groupby(enumerate(scan_idx), lambda x: x[0] - x[1]): + groups.append(list(map(itemgetter(1), g))) + + # Extract onsets + onsets = np.array([g[0] for g in groups]) + onsets_in_sec = onsets * samplerate + durations = np.array([g[-1] - g[0] for g in groups]) + durations_in_sec = durations * samplerate + df = pd.DataFrame( + columns=["onset"], + data=onsets_in_sec, + ) + df["index"] = onsets + df["duration"] = durations_in_sec + return df + + +def synchronize_onsets(phys_df, scan_df): + """ + Find matching scans and physio trigger periods from separate DataFrames. + + Uses time differences within each DataFrame. + There can be fewer physios than scans (task failed to trigger physio) + or fewer scans than physios (aborted scans are not retained in BIDS dataset). + + Onsets are in seconds. The baseline (i.e., absolute timing) doesn't matter. + Relative timing is all that matters. + + Parameters + ---------- + phys_df : pandas.DataFrame + DataFrame with onsets of physio trigger periods, in seconds. The + baseline does not matter, so it is reasonable for the onsets to start + with zero. The following columns are required: 'onset', 'index'. + scan_df : pandas.DataFrame + DataFrame with onsets and names of functional scans from BIDS dataset, + in seconds. The baseline does not matter, so it is reasonable for the + onsets to start with zero. The following columns are required: 'onset', + 'duration'. + + Returns + ------- + scan_df : pandas.DataFrame + Updated scan DataFrame, now with columns for predicted physio onsets in + seconds and in indices of the physio trigger channel, as well as scan + duration in units of the physio trigger channel. + """ + phys_df = phys_df.sort_values(by=["onset"]) + scan_df = scan_df.sort_values(by=["onset"]) + + # Get difference between each physio trigger onset and each scan onset + onset_diffs = np.zeros((scan_df.shape[0], phys_df.shape[0])) + for i, i_scan in scan_df.iterrows(): + for j, j_phys in phys_df.iterrows(): + onset_diff = j_phys["onset"] - i_scan["onset"] + onset_diffs[i, j] = onset_diff + + # Find the delay that gives the smallest difference between scan onsets + # and physio onsets + selected = (None, None) + thresh = 1000 + for i_scan in range(onset_diffs.shape[0]): + for j_phys in range(onset_diffs.shape[1]): + test_offset = onset_diffs[i_scan, j_phys] + diffs_from_phys_onset = onset_diffs - test_offset + diffs_from_abs = np.abs(diffs_from_phys_onset) + min_diff_row_idx = np.argmin(diffs_from_abs, axis=0) + min_diff_col_idx = np.arange(len(min_diff_row_idx)) + min_diffs = diffs_from_abs[min_diff_row_idx, min_diff_col_idx] + min_diff_sum = np.sum(min_diffs) + if min_diff_sum < thresh: + selected = (i_scan, j_phys) + thresh = min_diff_sum + + offset = onset_diffs[selected[0], selected[1]] + + # Isolate close, but negative relative onsets, to ensure scan onsets are + # always before or at physio triggers. + close_thresh = 2 # threshold for "close" onsets, in seconds + diffs_from_phys_onset = onset_diffs - offset + min_diff_row_idx = np.argmin(np.abs(diffs_from_phys_onset), axis=0) + min_diff_col_idx = np.arange(len(min_diff_row_idx)) + min_diffs = diffs_from_phys_onset[min_diff_row_idx, min_diff_col_idx] + min_diffs_tmp = min_diffs[abs(min_diffs) <= close_thresh] + min_val = min(min_diffs_tmp) + min_diffs += min_val + offset += min_val + LGR.info( + "Scan onsets should be adjusted forward by {} seconds to best " + "match physio onsets.".format(offset) + ) + + # Get onset of each scan in terms of the physio time series + scan_df["phys_onset"] = scan_df["onset"] + offset + rise = phys_df.loc[1, "index"] - phys_df.loc[0, "index"] + run = phys_df.loc[1, "onset"] - phys_df.loc[0, "onset"] + samplerate = rise / run + scan_df["index_onset"] = (scan_df["phys_onset"] * samplerate).astype(int) + scan_df["index_duration"] = (scan_df["duration"] * samplerate).astype(int) + scan_df["index_offset"] = scan_df["index_onset"] + scan_df["index_duration"] + return scan_df + + +def plot_sync(scan_df, physio_df): + """ + Plot unsynchronized and synchonized scan and physio onsets and durations. + + Parameters + ---------- + scan_df : pandas.DataFrame + DataFrame with timing associated with scan files. Must have the + following columns: onset (scan onsets in seconds), duration (scan + durations in seconds), and phys_onset (scan onsets after being matches + with physio onsets, in seconds). + physio_df : pandas.DataFrame + DataFrame with timing associated with physio trigger periods. Must have + the following columns: onset (trigger period onsets in seconds) and + duration (trigger period durations in seconds). + + Returns + ------- + fig : matplotlib.pyplot.Figure + Figure from plot. + axes : list of two matplotlib.pyplot.Axis objects + Axis objects for top and bottom subplots of figure. Top subplot has + unsynchronized onsets and bottom has synchronized ones. + """ + fig, axes = plt.subplots(nrows=2, figsize=(20, 6), sharex=True) + + # get max (onset time + duration) rounded up to nearest 1000 + max_ = int( + 1000 + * np.ceil( + max( + ( + (physio_df["onset"] + physio_df["duration"]).max(), + (scan_df["onset"] + scan_df["duration"]).max(), + (scan_df["phys_onset"] + scan_df["duration"]).max(), + ) + ) + / 1000 + ) + ) + + # get x-axis values + scalar = 10 + x = np.linspace(0, max_, (max_ * scalar) + 1) + + # first plot the onsets and durations of the raw scan and physio runs in + # the top axis + physio_timeseries = np.zeros(x.shape) + func_timeseries = np.zeros(x.shape) + for i, row in scan_df.iterrows(): + func_timeseries[ + int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar) + ] = 1 + + for i, row in physio_df.iterrows(): + physio_timeseries[ + int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar) + ] = 0.5 + + axes[0].fill_between( + x, + func_timeseries, + where=func_timeseries >= 0, + interpolate=True, + color="red", + alpha=0.3, + label="Functional scans", + ) + axes[0].fill_between( + x, + physio_timeseries, + where=physio_timeseries >= 0, + interpolate=True, + color="blue", + alpha=0.3, + label="Physio triggers", + ) + + # now plot the adjusted onsets (and original durations) in the bottom axis + physio_timeseries = np.zeros(x.shape) + func_timeseries = np.zeros(x.shape) + for i, row in scan_df.iterrows(): + func_timeseries[ + int(row["phys_onset"] * scalar): int( + (row["phys_onset"] + row["duration"]) * scalar + ) + ] = 1 + + for i, row in physio_df.iterrows(): + physio_timeseries[ + int(row["onset"] * scalar): int((row["onset"] + row["duration"]) * scalar) + ] = 0.5 + + axes[1].fill_between( + x, + func_timeseries, + where=func_timeseries >= 0, + interpolate=True, + color="red", + alpha=0.3, + label="Functional scans", + ) + axes[1].fill_between( + x, + physio_timeseries, + where=physio_timeseries >= 0, + interpolate=True, + color="blue", + alpha=0.3, + label="Physio triggers", + ) + + axes[0].set_xlim((min(x), max(x))) + axes[0].set_ylim((0, None)) + axes[1].set_xlabel("Time (s)") + axes[0].legend() + return fig, axes + + +def workflow(physio, bids_dir, sub, ses=None, padding=9, update_trigger=False): + """ + Run physio/scan onset synchronization and BIDSification. + + This workflow writes out physio files to a BIDS dataset. + + Parameters + ---------- + physio : BlueprintInput + Object *must* contain multiple physio trigger periods associated with scans. + bids_dir : str + Path to BIDS dataset + sub : str + Subject ID. Used to search the BIDS dataset for relevant scans. + ses : str or None, optional + Session ID. Used to search the BIDS dataset for relevant scans in + longitudinal studies. Default is None. + padding : float or tuple, optional + Amount of time before and after run to keep in physio time series, in seconds. + May be a single value (in which case the time before and after is the same) or + a two-item tuple (which case the first item is time before and the second is + time after). + These values will be automatically reduced in cases where the pad would extend + before or after the physio acquisition. + update_trigger : bool, optional + Whether to update the trigger channel time series based on estimated scan onsets from + the BIDS dataset (True) or to leave it as-is (False). Default is False. + + Returns + ------- + out : list of BlueprintOutput + + Notes + ----- + The workflow works as follows: + 1. Extract trigger period onsets from physio file. + 2. Extract scan times from BIDS dataset, to get the following information: + - Scan name + - Onset with subsecond resolution as close to trigger pulse as possible + - Duration (from TR * data dimension) + 3. Calculate difference in time between each scan onset and each physio + trigger onset. + 4. Use differences to identify similar values across as many physio/scan + pairs as possible. Physio may be missing if trigger failed. + Physio may be delayed if task was manually triggered after scan began, + or if there's a bug in the task script. Scan may be missing if it wasn't + converted (e.g., a scan stopped early and re-run). + 5. Infer physio periods from scan periods, once physio and scan onsets + are matched. + 6. Assign scan names to physio periods, infer times for other scan times + in cases where trigger failed, and ignore trigger periods associated with + scans that weren't kept in the BIDS dataset. + 7. Split physio file based on scan-specific onsets and offsets. + 8. Write out scan-associated physio files. + 9. Generate and return scan/physio onsets figure for manual QC. + """ + layout = BIDSLayout(bids_dir) + scan_df = load_scan_data(layout, sub=sub, ses=ses) + + trigger_timeseries = physio.timeseries[physio.trigger_idx] + freq = physio.freq[physio.trigger_idx] + physio_df = extract_physio_onsets(trigger_timeseries, freq=freq) + scan_df = synchronize_onsets(physio_df, scan_df) + + # we should do something better with this figure, but it's nice to have for QC + fig, axes = plot_sync(scan_df, physio_df) + fig.savefig("synchronization_results.png") + + run_dict = {} + # could probably be replaced with apply() followed by to_dict() + for _, row in scan_df.iterrows(): + base_fname = update_bids_name(row["filename"], suffix="physio", extension="") + split_times = (row["index_onset"], row["index_offset"]) + run_dict[base_fname] = split_times + phys_dict = slice_phys( + physio, run_dict, padding=padding, update_trigger=update_trigger + ) + outputs = [] + for k, v in phys_dict.items(): + output = BlueprintOutput.init_from_blueprint(v) + output.filename = k + outputs.append(output) + # Let's not actually save files until we're more confident. + # output.save() + return outputs diff --git a/phys2bids/phys2bids.py b/phys2bids/phys2bids.py index c1b4fd46c..8d66a9780 100644 --- a/phys2bids/phys2bids.py +++ b/phys2bids/phys2bids.py @@ -87,34 +87,6 @@ def print_summary(filename, ntp_expected, ntp_found, samp_freq, time_offset, out utils.write_file(outfile, '.log', summary) -def print_json(outfile, samp_freq, time_offset, ch_name): - """ - Print the json required by BIDS format. - - Parameters - ---------- - outfile: str or path - Fullpath to output file. - samp_freq: float - Frequency of sampling for the output file. - time_offset: float - Difference between beginning of file and first TR. - ch_name: list of str - List of channel names, as specified by BIDS format. - - Notes - ----- - Outcome: - outfile: .json file - File containing information for BIDS. - """ - start_time = -time_offset - summary = dict(SamplingFrequency=samp_freq, - StartTime=round(start_time, 4), - Columns=ch_name) - utils.write_json(outfile, summary, indent=4, sort_keys=False) - - @due.dcite( Doi('10.5281/zenodo.3470091'), path='phys2bids', @@ -430,8 +402,8 @@ def phys2bids(filename, info=False, indir='.', outdir='.', heur_file=None, LGR.info(f'Exporting files for run {run} freq {uniq_freq}') np.savetxt(phys_out[key].filename + '.tsv.gz', phys_out[key].timeseries, fmt='%.8e', delimiter='\t') - print_json(phys_out[key].filename, phys_out[key].freq, - phys_out[key].start_time, phys_out[key].ch_name) + utils.save_json(phys_out[key].filename, phys_out[key].freq, + phys_out[key].start_time, phys_out[key].ch_name) print_summary(filename, num_timepoints_expected, phys_in[run].num_timepoints_found, uniq_freq, phys_out[key].start_time, diff --git a/phys2bids/physio_obj.py b/phys2bids/physio_obj.py index 3466aa626..ca5d97408 100644 --- a/phys2bids/physio_obj.py +++ b/phys2bids/physio_obj.py @@ -9,6 +9,8 @@ import numpy as np +from .utils import save_json + LGR = logging.getLogger(__name__) @@ -39,7 +41,7 @@ def is_valid(var, var_type, list_type=None): If var is not of var_type """ if not isinstance(var, var_type): - raise AttributeError(f'The given variable is not a {var_type}') + raise AttributeError(f"The given variable is not a {var_type}") if var_type is list and list_type is not None: for element in var: @@ -103,7 +105,7 @@ def are_equal(self, other): def _deal_with_dict_value_error(self, other): # Check if "self" has a 'timeseries' key. If not, return False. try: - self['timeseries'] + self["timeseries"] except KeyError: return False except TypeError: @@ -112,16 +114,18 @@ def _deal_with_dict_value_error(self, other): # Check that the two objects have the same keys. # If not, return False, otherwise loop through the timeseries key. if self.keys() == other.keys(): - alltrue_timeseries = [False] * len(self['timeseries']) + alltrue_timeseries = [False] * len(self["timeseries"]) alltrue_keys = [False] * len(self) for j, key in enumerate(self.keys()): - if key == 'timeseries': - for i in range(len(self['timeseries'])): - alltrue_timeseries[i] = (self['timeseries'][i].all() - == other['timeseries'][i].all()) + if key == "timeseries": + for i in range(len(self["timeseries"])): + alltrue_timeseries[i] = ( + self["timeseries"][i].all() + == other["timeseries"][i].all() + ) alltrue_keys[j] = all(alltrue_timeseries) else: - alltrue_keys[j] = (self[key] == other[key]) + alltrue_keys[j] = self[key] == other[key] return all(alltrue_keys) else: return False @@ -154,7 +158,7 @@ def _deal_with_dict_value_error(self, other): return _deal_with_dict_value_error(self, other) -class BlueprintInput(): +class BlueprintInput: """ Main input object for phys2bids. @@ -226,15 +230,24 @@ class BlueprintInput(): - Actual number of channels +1 <= ch_amount """ - def __init__(self, timeseries, freq, ch_name, units, trigger_idx, - num_timepoints_found=None, thr=None, time_offset=0): + def __init__( + self, + timeseries, + freq, + ch_name, + units, + trigger_idx, + num_timepoints_found=None, + thr=None, + time_offset=0, + ): """Initialise BlueprintInput (see class docstring).""" self.timeseries = deepcopy(is_valid(timeseries, list, list_type=np.ndarray)) self.freq = deepcopy(has_size(is_valid(freq, list, list_type=(int, float)), self.ch_amount, 0.0)) - self.ch_name = deepcopy(has_size(ch_name, self.ch_amount, 'unknown')) - self.units = deepcopy(has_size(units, self.ch_amount, '[]')) + self.ch_name = deepcopy(has_size(ch_name, self.ch_amount, "unknown")) + self.units = deepcopy(has_size(units, self.ch_amount, "[]")) self.trigger_idx = deepcopy(is_valid(trigger_idx, int)) self.num_timepoints_found = deepcopy(num_timepoints_found) self.thr = deepcopy(thr) @@ -307,33 +320,43 @@ def __getitem__(self, idx): # Check that the indexes are not out of bounds if idx.start >= trigger_length or idx.stop > trigger_length: - raise IndexError(f'Slice ({idx.start}, {idx.stop}) is out of ' - f'bounds for channel {self.trigger_idx} ' - f'with size {trigger_length}') + raise IndexError( + f"Slice ({idx.start}, {idx.stop}) is out of " + f"bounds for channel {self.trigger_idx} " + f"with size {trigger_length}" + ) # Operate on each channel on its own for n, channel in enumerate(self.timeseries): - idx_dict = {'start': idx.start, 'stop': idx.stop, 'step': idx.step} + idx_dict = {"start": idx.start, "stop": idx.stop, "step": idx.step} # Adapt the slicing indexes to the right requency - for i in ['start', 'stop', 'step']: + for i in ["start", "stop", "step"]: if idx_dict[i]: - idx_dict[i] = int(np.floor(self.freq[n] - / self.freq[self.trigger_idx] - * idx_dict[i])) + idx_dict[i] = int( + np.floor( + self.freq[n] / self.freq[self.trigger_idx] * idx_dict[i] + ) + ) # Correct the slicing stop if necessary - if idx_dict['start'] == idx_dict['stop'] or return_instant: - idx_dict['stop'] = idx_dict['start'] + 1 + if idx_dict["start"] == idx_dict["stop"] or return_instant: + idx_dict["stop"] = idx_dict["start"] + 1 elif trigger_length == idx.stop: - idx_dict['stop'] = len(channel) + idx_dict["stop"] = len(channel) - new_idx = slice(idx_dict['start'], idx_dict['stop'], idx_dict['step']) + new_idx = slice(idx_dict["start"], idx_dict["stop"], idx_dict["step"]) sliced_timeseries[n] = channel[new_idx] - return BlueprintInput(sliced_timeseries, self.freq, self.ch_name, - self.units, self.trigger_idx, - self.num_timepoints_found, self.thr, - self.time_offset) + return BlueprintInput( + sliced_timeseries, + self.freq, + self.ch_name, + self.units, + self.trigger_idx, + self.num_timepoints_found, + self.thr, + self.time_offset, + ) def __eq__(self, other): """ @@ -367,13 +390,16 @@ def rename_channels(self, new_names): self.ch_name: list of str Changes content to new_name. """ - if 'time' in new_names: - del new_names[new_names.index('time')] + if "time" in new_names: + del new_names[new_names.index("time")] - new_names = ['time', ] + new_names + new_names = [ + "time", + ] + new_names - self.ch_name = has_size(is_valid(new_names, list, list_type=str), - self.ch_amount, 'unknown') + self.ch_name = has_size( + is_valid(new_names, list, list_type=str), self.ch_amount, "unknown" + ) def return_index(self, idx): """ @@ -390,8 +416,13 @@ def return_index(self, idx): Tuple containing the proper list entry of all the properties of the object with index `idx` """ - return (self.timeseries[idx], self.ch_amount, self.freq[idx], - self.ch_name[idx], self.units[idx]) + return ( + self.timeseries[idx], + self.ch_amount, + self.freq[idx], + self.ch_name[idx], + self.units[idx], + ) def delete_at_index(self, idx): """ @@ -422,8 +453,10 @@ def delete_at_index(self, idx): del self.units[idx] if self.trigger_idx == idx: - LGR.warning('Removing trigger channel - are you sure you are doing' - 'the right thing?') + LGR.warning( + "Removing trigger channel - are you sure you are doing" + "the right thing?" + ) self.trigger_idx = 0 def check_trigger_amount(self, thr=None, num_timepoints_expected=0, tr=0): @@ -453,7 +486,7 @@ def check_trigger_amount(self, thr=None, num_timepoints_expected=0, tr=0): The property `timeseries` is shifted with the 0 being the time of first trigger. """ - LGR.info('Counting trigger points') + LGR.info("Counting trigger points") # Use the trigger channel to find the TRs, # comparing it to a given threshold. trigger = self.timeseries[self.trigger_idx] @@ -473,46 +506,55 @@ def check_trigger_amount(self, thr=None, num_timepoints_expected=0, tr=0): thr = np.mean(trigger) + 2 * np.std(trigger) flag = 1 timepoints = trigger > thr - num_timepoints_found = len([is_true for is_true, _ in groupby(timepoints, - lambda x: x != 0) if is_true]) + num_timepoints_found = len( + [is_true for is_true, _ in groupby(timepoints, lambda x: x != 0) if is_true] + ) if flag == 1: - LGR.info(f'The number of timepoints according to the std_thr method ' - f'is {num_timepoints_found}. The computed threshold is {thr:.4f}') + LGR.info( + f"The number of timepoints according to the std_thr method " + f"is {num_timepoints_found}. The computed threshold is {thr:.4f}" + ) else: LGR.info(f'The number of timepoints found with the manual threshold of {thr:.4f} ' f'is {num_timepoints_found}') time_offset = time[timepoints.argmax()] if num_timepoints_expected: - LGR.info('Checking number of timepoints') + LGR.info("Checking number of timepoints") if num_timepoints_found > num_timepoints_expected: - timepoints_extra = (num_timepoints_found - - num_timepoints_expected) - LGR.warning(f'Found {timepoints_extra} timepoints' - ' more than expected!\n' - 'Assuming extra timepoints are at the end ' - '(try again with a more liberal thr)') + timepoints_extra = num_timepoints_found - num_timepoints_expected + LGR.warning( + f"Found {timepoints_extra} timepoints" + " more than expected!\n" + "Assuming extra timepoints are at the end " + "(try again with a more liberal thr)" + ) elif num_timepoints_found < num_timepoints_expected: - timepoints_missing = (num_timepoints_expected - - num_timepoints_found) - LGR.warning(f'Found {timepoints_missing} timepoints' - ' less than expected!') + timepoints_missing = num_timepoints_expected - num_timepoints_found + LGR.warning( + f"Found {timepoints_missing} timepoints" " less than expected!" + ) if tr: - LGR.warning('Correcting time offset, assuming missing ' - 'timepoints are at the beginning (try again ' - 'with a more conservative thr)') - time_offset -= (timepoints_missing * tr) + LGR.warning( + "Correcting time offset, assuming missing " + "timepoints are at the beginning (try again " + "with a more conservative thr)" + ) + time_offset -= timepoints_missing * tr else: - LGR.warning('Can\'t correct time offset - you should ' - 'specify the TR') + LGR.warning( + "Can't correct time offset - you should " "specify the TR" + ) else: - LGR.info('Found just the right amount of timepoints!') + LGR.info("Found just the right amount of timepoints!") else: - LGR.warning('The necessary options to find the amount of timepoints ' - 'were not provided.') + LGR.warning( + "The necessary options to find the amount of timepoints " + "were not provided." + ) self.thr = thr self.time_offset = time_offset self.timeseries[0] = self.timeseries[0] - time_offset @@ -534,17 +576,20 @@ def print_info(self, filename): Returns to stdout (e.g. on screen) channels, their names and their sampling rate. """ - info = (f'\n------------------------------------------------' - f'\nFile {filename} contains:\n') + info = ( + f"\n------------------------------------------------" + f"\nFile {filename} contains:\n" + ) for ch in range(1, self.ch_amount): - info = info + (f'{ch:02d}. {self.ch_name[ch]};' - f' sampled at {self.freq[ch]} Hz\n') - info = info + '------------------------------------------------\n' + info = info + ( + f"{ch:02d}. {self.ch_name[ch]};" f" sampled at {self.freq[ch]} Hz\n" + ) + info = info + "------------------------------------------------\n" LGR.info(info) -class BlueprintOutput(): +class BlueprintOutput: """ Main output object for phys2bids. @@ -585,7 +630,7 @@ class BlueprintOutput(): method to populate from input blueprint instead of init """ - def __init__(self, timeseries, freq, ch_name, units, start_time, filename=''): + def __init__(self, timeseries, freq, ch_name, units, start_time, filename=""): """Initialise BlueprintOutput (see class docstring).""" self.timeseries = deepcopy(is_valid(timeseries, np.ndarray)) self.freq = deepcopy(is_valid(freq, (int, float))) @@ -636,8 +681,14 @@ def return_index(self, idx): Tuple containing the proper list entry of all the properties of the object with index `idx` """ - return (self.timeseries[:, idx], self.ch_amount, self.freq, - self.ch_name[idx], self.units[idx], self.start_time) + return ( + self.timeseries[:, idx], + self.ch_amount, + self.freq, + self.ch_name[idx], + self.units[idx], + self.start_time, + ) def delete_at_index(self, idx): """ @@ -689,3 +740,16 @@ def init_from_blueprint(cls, blueprint): units = blueprint.units start_time = timeseries[0, 0] return cls(timeseries, freq, ch_name, units, start_time) + + def save(self): + """ + Save object to BIDS-format pair of files. + + Notes + ----- + Saves a tsv.gz file and a json file. + """ + np.savetxt( + self.filename + ".tsv.gz", self.timeseries, fmt="%.8e", delimiter="\t" + ) + save_json(self.filename + ".json", self.freq, self.start_time, self.ch_name) diff --git a/phys2bids/slice4phys.py b/phys2bids/slice4phys.py index 7d0305c58..345e21804 100644 --- a/phys2bids/slice4phys.py +++ b/phys2bids/slice4phys.py @@ -4,6 +4,9 @@ from copy import deepcopy import numpy as np +from scipy.signal import resample + +from phys2bids.bids import update_bids_name LGR = logging.getLogger(__name__) @@ -106,6 +109,123 @@ def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9): return run_timestamps +def slice_phys(phys, run_timestamps, padding=9, update_trigger=False): + """ + Slice a physio object based on run-/file-wise onsets and offsets. + + Parameters + ---------- + phys : BlueprintInput + run_timestamps : dict + Each key is a run-wise filename and value is a tuple of (onset, offset), + where onset and offset are integers corresponding to index values of + the trigger channel. + padding : float or tuple, optional + Amount of time before and after run to keep in physio time series, in seconds. + May be a single value (in which case the time before and after is the same) or + a two-item tuple (which case the first item is time before and the second is + time after). + These values will be automatically reduced in cases where the pad would extend + before or after the physio acquisition. + update_trigger : bool, optional + Whether to update the trigger channel time series based on estimated scan onsets from + the BIDS dataset (True) or to leave it as-is (False). Default is False. + + Returns + ------- + phys_in_slices : dict + Each key is a run-wise filename (possibly further split by frequency) + and each value is a BlueprintInput object. + + Notes + ----- + The goal of this function is to abstract out the general slicing procedure + from slice4phys. + """ + phys = deepcopy(phys) + if update_trigger: + phys.freq.append(phys.freq[phys.trigger_idx]) + phys.units.append(phys.units[phys.trigger_idx]) + phys.timeseries.append(phys.timeseries[phys.trigger_idx].copy()) + phys.ch_name.append('original trigger') + + # Fix up the trigger time series + phys.timeseries[phys.trigger_idx][:] = 0 + for ons, off in run_timestamps.values(): + phys.timeseries[phys.trigger_idx][ons:off] = 1 + + if not isinstance(padding, tuple): + time_before, time_after = padding, padding + else: + time_before, time_after = padding + + phys_in_slices = {} + for i_run, fname in enumerate(run_timestamps.keys()): + run_attributes = run_timestamps[fname] # tmp variable to collect run's info + trigger_onset, trigger_offset = run_attributes + min_onset, max_offset = 0, phys.timeseries[phys.trigger_idx].shape[0] + + unique_frequencies = np.unique(phys.freq) + trigger_freq = phys.freq[phys.trigger_idx] + + # Limit padding based on beginning and end of physio recording. + # We could also limit padding to prevent overlap between scans, if desired. + run_time_before = np.minimum(time_before, (trigger_onset - min_onset) / trigger_freq) + run_time_after = np.minimum(time_after, (max_offset - trigger_offset) / trigger_freq) + + for freq in unique_frequencies: + to_subtract = int(run_time_before * freq) + to_add = int(run_time_after * freq) + + run_onset_idx = int(trigger_onset * freq / trigger_freq) - to_subtract + run_offset_idx = int(trigger_offset * freq / trigger_freq) + to_add + + # Split into frequency-specific object limited to onset-offset + temp_phys_in = deepcopy(phys[run_onset_idx:run_offset_idx]) + + if temp_phys_in.freq[temp_phys_in.trigger_idx] != freq: + example_ts = phys.timeseries[phys.freq.index(freq)] + + # Determine time + new_time = (np.arange(example_ts.shape[0]) / temp_phys_in.timeseries[ + phys.freq.index(freq)].freq + ) + new_time += np.min(temp_phys_in.timeseries[0]) + temp_phys_in.timeseries[0] = new_time + temp_phys_in.freq[0] = freq + + # Resample trigger + new_trigger = temp_phys_in.timeseries[temp_phys_in.trigger_idx] + new_trigger = resample(new_trigger, example_ts.shape[0], window=10) + temp_phys_in.timeseries[temp_phys_in.trigger_idx] = new_trigger + temp_phys_in.freq[temp_phys_in.trigger_idx] = freq + + if len(unique_frequencies) > 1: + run_fname = update_bids_name(fname, recording=str(freq) + 'Hz') + + # Drop other frequency channels + channel_idx = np.arange(temp_phys_in.ch_amount) + nonfreq_channels = [i for i in channel_idx if temp_phys_in.freq[i] != freq] + freq_channels = [i for i in channel_idx if temp_phys_in.freq[i] == freq] + temp_phys_in = temp_phys_in.delete_at_index(nonfreq_channels) + + # Update trigger channel index around dropped channels + new_trigger_idx = freq_channels.index(temp_phys_in.trigger_idx) + temp_phys_in.trigger_idx = new_trigger_idx + else: + run_fname = fname + + # zero out time + temp_phys_in.timeseries[0] = ( + temp_phys_in.timeseries[0] - np.min(temp_phys_in.timeseries[0]) + ) + # Now take out the time before the scan starts + temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before + + phys_in_slices[run_fname] = temp_phys_in + return phys_in_slices + + def slice4phys(phys_in, ntp_list, tr_list, thr, padding=9): """ Slice runs for phys2bids. @@ -114,13 +234,10 @@ def slice4phys(phys_in, ntp_list, tr_list, thr, padding=9): --------- phys_in: BlueprintInput object Object returned by BlueprintInput class - ntp_list: list - a list of integers given by the user as `ntp` input + nsec_list: list + a list of floats given by the user as `nsec` input Default: [0, ] - tr_list: list - a list of float given by the user as `tr` input - Default: [1,] - padding: int + padding: int, optional extra time at beginning and end of timeseries, expressed in seconds (s) Default: 9 diff --git a/phys2bids/tests/test_phys2bids.py b/phys2bids/tests/test_phys2bids.py index 5161e8e85..b3893d774 100644 --- a/phys2bids/tests/test_phys2bids.py +++ b/phys2bids/tests/test_phys2bids.py @@ -24,26 +24,6 @@ def test_print_summary(tmpdir): assert os.path.isfile(str(test_outfile_log)) -def test_print_json(tmpdir): - test_outfile = tmpdir.join('json_test') - test_outfile_json = tmpdir.join('json_test.json') - test_samp_freq = 0.1 - test_time_offset = -0.5 - test_ch_name = 'foo' - - phys2bids.print_json(str(test_outfile), test_samp_freq, test_time_offset, test_ch_name) - - assert os.path.isfile(test_outfile_json) - - test_json_data = dict(SamplingFrequency=0.1, - StartTime=0.5, - Columns='foo') - with open(test_outfile_json, 'r') as src: - loaded_data = json.load(src) - - assert test_json_data == loaded_data - - def test_raise_exception(samefreq_full_acq_file): test_path, test_filename = os.path.split(samefreq_full_acq_file) with raises(Exception) as errorinfo: diff --git a/phys2bids/tests/test_utils.py b/phys2bids/tests/test_utils.py index 4844ed562..b4dd79df7 100644 --- a/phys2bids/tests/test_utils.py +++ b/phys2bids/tests/test_utils.py @@ -56,7 +56,27 @@ def test_write_file(tmpdir): utils.write_file(test_old_path, ext, test_text) -# Tests write_json +def test_save_json(tmpdir): + test_outfile = tmpdir.join('json_test') + test_outfile_json = tmpdir.join('json_test.json') + test_samp_freq = 0.1 + test_time_offset = -0.5 + test_ch_name = 'foo' + + utils.save_json(str(test_outfile), test_samp_freq, test_time_offset, test_ch_name) + + assert os.path.isfile(test_outfile_json) + + test_json_data = dict(SamplingFrequency=0.1, + StartTime=0.5, + Columns='foo') + with open(test_outfile_json, 'r') as src: + loaded_data = json.load(src) + + assert test_json_data == loaded_data + + +# Tests writejson def test_write_json(tmpdir): test_json_filename = tmpdir.join('foo') test_json_data = dict(SamplingFrequency=42, diff --git a/phys2bids/utils.py b/phys2bids/utils.py index af82123d2..93bdf61dc 100644 --- a/phys2bids/utils.py +++ b/phys2bids/utils.py @@ -164,6 +164,34 @@ def write_file(filename, ext, text): print(text, file=text_file) +def save_json(outfile, samp_freq, time_offset, ch_name): + """ + Print the json required by BIDS format. + + Parameters + ---------- + outfile: str or path + Fullpath to output file. + samp_freq: float + Frequency of sampling for the output file. + time_offset: float + Difference between beginning of file and first TR. + ch_name: list of str + List of channel names, as specified by BIDS format. + + Notes + ----- + Outcome: + outfile: .json file + File containing information for BIDS. + """ + start_time = -time_offset + summary = dict(SamplingFrequency=samp_freq, + StartTime=round(start_time, 4), + Columns=ch_name) + write_json(outfile, summary, indent=4, sort_keys=False) + + def write_json(filename, data, **kwargs): """ Output a json file with the given data inside. diff --git a/setup.cfg b/setup.cfg index cafcfd943..acb8dd3f6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,6 +25,9 @@ install_requires = numpy >=1.9.3, !=*rc* matplotlib >=3.1.1, !=3.3.0rc1 PyYAML >=5.1, !=*rc* + scipy + pybids + pandas tests_require = pytest >=5.3 test_suite = pytest