-
Notifications
You must be signed in to change notification settings - Fork 122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Enable models for sparsely sampled fMRI series #414
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,6 +85,27 @@ def load_variables(layout, types=None, levels=None, skip_empty=True, | |
return dataset | ||
|
||
|
||
def _get_timing_info(img_md): | ||
if 'RepetitionTime' in img_md: | ||
tr = img_md['RepetitionTime'] | ||
if 'DelayTime' in img_md: | ||
ta = tr - img_md['DelayTime'] | ||
elif 'SliceTiming' in img_md: | ||
slicetimes = sorted(img_md['SliceTiming']) | ||
# a, b ... z | ||
# z = final slice onset, b - a = slice duration | ||
ta = np.round(slicetimes[-1] + slicetimes[1] - slicetimes[0], 3) | ||
# If the "silence" is <1ms, consider acquisition continuous | ||
if np.abs(tr - ta) < 1e-3: | ||
ta = tr | ||
else: | ||
ta = tr | ||
elif 'VolumeTiming' in img_md: | ||
return NotImplemented | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be |
||
|
||
return tr, ta | ||
|
||
|
||
def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, | ||
drop_na=True, events=True, physio=True, stim=True, | ||
regressors=True, skip_empty=True, scope='all', | ||
|
@@ -146,8 +167,9 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, | |
if 'run' in entities: | ||
entities['run'] = int(entities['run']) | ||
|
||
tr = layout.get_metadata(img_f, suffix='bold', scope=scope, | ||
full_search=True)['RepetitionTime'] | ||
img_md = layout.get_metadata(img_f, suffix='bold', scope=scope, | ||
full_search=True) | ||
tr, ta = _get_timing_info(img_md) | ||
|
||
# Get duration of run: first try to get it directly from the image | ||
# header; if that fails, try to get NumberOfVolumes from the | ||
|
@@ -167,7 +189,8 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, | |
raise ValueError(msg) | ||
|
||
run = dataset.get_or_create_node('run', entities, image_file=img_f, | ||
duration=duration, repetition_time=tr) | ||
duration=duration, repetition_time=tr, | ||
acquisition_time=ta) | ||
run_info = run.get_info() | ||
|
||
# Process event files | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -311,6 +311,51 @@ def get_duration(self): | |
''' Return the total duration of the Variable's run(s). ''' | ||
return sum([r.duration for r in self.run_info]) | ||
|
||
def get_sampling_rate(self, target_sr, rounding='up'): | ||
''' Choose sampling rate that evenly bins acquisition and repetition time | ||
|
||
Args: | ||
target_sr (float): Approximate sampling rate to target (in Hz) | ||
rounding (str): Round the sampling interval 'up' (default) or 'down' if | ||
``1 / target_sr`` doesn't evenly divide acquisition and repetition | ||
times | ||
|
||
Returns: | ||
sampling_rate (float) | ||
|
||
''' | ||
assert rounding in ('up', 'down') | ||
|
||
# Convention: lowercase in float seconds/Hz, CAPS in int ms | ||
trs = {ri.tr for ri in var.run_info} | ||
tas = {ri.ta for ri in var.run_info} | ||
assert len(trs) == 1 | ||
assert len(tas) == 1 | ||
|
||
TR = int(np.round(1000. * trs.pop())) | ||
TA = int(np.round(1000. * tas.pop())) | ||
# Nothing to do for continuous acquisition | ||
if TR == TA: | ||
return target_sr | ||
|
||
TARGET_DT = int(np.round(1000. / target_sr)) | ||
|
||
# Interval must evenly divide into TR and TA | ||
DT = gcd(TR, TA) | ||
# Find nearest divisor of DT to TARGET_DT | ||
if DT > TARGET_DT: | ||
div, mod = divmod(DT, TARGET_DT) | ||
if mod == 0: | ||
DT = TARGET_DT | ||
elif rounding == 'up': | ||
DT = DT // div | ||
else: | ||
DT = DT // (div + 1) | ||
|
||
# Back to Hz | ||
return 1000. / DT | ||
|
||
|
||
def to_dense(self, sampling_rate): | ||
''' Convert the current sparse column to a dense representation. | ||
Returns: A DenseRunVariable. | ||
|
@@ -425,7 +470,7 @@ def _build_entity_index(self, run_info, sampling_rate): | |
self.timestamps = pd.concat(_timestamps, axis=0, sort=True) | ||
return pd.concat(index, axis=0, sort=True).reset_index(drop=True) | ||
|
||
def resample(self, sampling_rate, inplace=False, kind='linear'): | ||
def resample(self, sampling_rate, integration_window=None, inplace=False, kind='linear'): | ||
'''Resample the Variable to the specified sampling rate. | ||
|
||
Parameters | ||
|
@@ -442,7 +487,7 @@ def resample(self, sampling_rate, inplace=False, kind='linear'): | |
''' | ||
if not inplace: | ||
var = self.clone() | ||
var.resample(sampling_rate, True, kind) | ||
var.resample(sampling_rate, integration_window, True, kind) | ||
return var | ||
|
||
if sampling_rate == self.sampling_rate: | ||
|
@@ -453,18 +498,36 @@ def resample(self, sampling_rate, inplace=False, kind='linear'): | |
|
||
self.index = self._build_entity_index(self.run_info, sampling_rate) | ||
|
||
x = np.arange(n) | ||
num = len(self.index) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is on me I think, but we could probably use less confusing names than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I can try to rewrite; I was mostly aiming for a minimal diff. |
||
|
||
from scipy.interpolate import interp1d | ||
f = interp1d(x, self.values.values.ravel(), kind=kind) | ||
x_new = np.linspace(0, n - 1, num=num) | ||
self.values = pd.DataFrame(f(x_new)) | ||
if integration_window is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than determine which approach to use based on whether In general, I wonder if it makes sense to implement the resampling operations ourselves, or if we should just do it via pandas (or something like traces, which wraps pandas). I would expect that resampling in pandas is probably going to be more efficient than rolling our own solution, while relieving of us of the burden of extensive testing. |
||
from scipy.sparse import lil_matrix | ||
old_times = np.arange(n) / old_sr | ||
new_times = np.arange(num) / sampling_rate | ||
integrator = lil_matrix((num, n), dtype=np.uint8) | ||
count = None | ||
for i, new_time in enumerate(new_times): | ||
cols = (old_times >= new_time) & (old_times < new_time + integration_window) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the integration window be centered on |
||
# This could be determined analytically, but dodging off-by-one errors | ||
if count is None: | ||
count = np.sum(cols) | ||
integrator[i, cols] = 1 | ||
|
||
old_vals = self.values.values | ||
self.values = pd.DataFrame(integrator.tocsr().dot(old_vals) / count) | ||
else: | ||
from scipy.interpolate import interp1d | ||
x = np.arange(n) | ||
f = interp1d(x, self.values.values.ravel(), kind=kind) | ||
x_new = np.linspace(0, n - 1, num=num) | ||
self.values = pd.DataFrame(f(x_new)) | ||
|
||
assert len(self.values) == len(self.index) | ||
|
||
self.sampling_rate = sampling_rate | ||
|
||
def to_df(self, condition=True, entities=True, timing=True, sampling_rate=None): | ||
def to_df(self, condition=True, entities=True, timing=True, sampling_rate=None, | ||
integration_window=None): | ||
'''Convert to a DataFrame, with columns for name and entities. | ||
|
||
Parameters | ||
|
@@ -480,7 +543,8 @@ def to_df(self, condition=True, entities=True, timing=True, sampling_rate=None): | |
sampled uniformly). If False, omits them. | ||
''' | ||
if sampling_rate not in (None, self.sampling_rate): | ||
return self.resample(sampling_rate).to_df(condition, entities) | ||
return self.resample(sampling_rate, | ||
integration_window=integration_window).to_df(condition, entities) | ||
|
||
df = super(DenseRunVariable, self).to_df(condition, entities) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it worth testing for non-uniform slice onsets (and raise a not supported error)? I don't know if that ever happens in practice, but if it does, we should probably fail...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That feels like a validation problem. While we can put those checks in, ad hoc, I think it would make sense to either fail on load for validation problems or be able to insert a little boilerplate check like:
validate(slicetimes, 'SliceTiming')