Merge branch 'dvc' into main

dmalt · Jul 18, 2022 · 16e0c73 · 16e0c73
2 parents 4c8e99b + 7bed7d5
commit 16e0c73
Show file tree

Hide file tree

Showing 103 changed files with 4,569 additions and 40 deletions.
diff --git a/.dvc/.gitignore b/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/.dvc/config b/.dvc/config
@@ -0,0 +1,7 @@
+[core]
+    remote = gdrive_remote
+    autostage = true
+[cache]
+    type = "hardlink,symlink"
+['remote "gdrive_remote"']
+    url = gdrive://1ZGbeFsXyb8NDOQICV2Ul6Eut0KMP0LMJ
diff --git a/.dvcignore b/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 __pycache__
+speech_meg.egg-info
diff --git a/environment.yml b/environment.yml
diff --git a/speech/__init__.py → py.typed b/speech/__init__.py → py.typed
diff --git a/rawdata/.gitignore b/rawdata/.gitignore
@@ -0,0 +1,4 @@
+/sub-test
+/sub-01
+/sub-02
+/sub-emptyroom
diff --git a/rawdata/code/analysis/.gitignore b/rawdata/code/analysis/.gitignore
@@ -0,0 +1,2 @@
+/meg_env_audio_mfcc_ica_mi.html
+/meg_env_audio_mfcc_ica_mi.html_
diff --git a/rawdata/code/analysis/main.py b/rawdata/code/analysis/main.py
@@ -0,0 +1,41 @@
+import numpy as np  # type: ignore
+from joblib import Parallel, delayed  # type: ignore
+from mne.io import read_raw_fif  # type: ignore
+from sklearn.feature_selection import mutual_info_regression  # type: ignore
+from speech import config as cfg  # type: ignore
+from tqdm.contrib.itertools import product  # type: ignore
+
+mfccs_ica = np.load(cfg.mfccs_ica_path)
+shifts = np.linspace(cfg.tmin, cfg.tmax, cfg.n_shifts)
+
+for band in cfg.bands:
+    print(f"Processing {band} band...")
+    raw = read_raw_fif(cfg.meg_env[band], verbose="ERROR")
+    raw_data = raw.get_data()[:, cfg.skip_samp :]
+    raw_data -= raw_data.mean(axis=1, keepdims=True)
+    raw_data /= raw_data.std(axis=1, keepdims=True)
+    n_sen, n_samp = raw_data.shape
+    win_len_samp = int(cfg.win_len_sec * raw.info["sfreq"])
+    winds = np.arange(0, n_samp, win_len_samp)
+    mi = np.zeros((n_sen, cfg.n_shifts, len(winds)))
+    shifts = (shifts * raw.info["sfreq"]).astype(int)
+
+    def process_iter(m, i_shift, i_sen, i_win):
+        mfcc_shift = np.roll(m, shifts[i_shift])[
+            winds[i_win] : winds[i_win] + win_len_samp
+        ]
+        sen_data = raw_data[i_sen, winds[i_win] : winds[i_win] + win_len_samp][
+            :, np.newaxis
+        ]
+        mi[i_sen, i_shift, i_win] += mutual_info_regression(
+            sen_data, mfcc_shift
+        )
+
+    Parallel(n_jobs=8, require="sharedmem", prefer="threads")(
+        delayed(process_iter)(m, i, j, w)
+        for m, i, j, w in product(
+            mfccs_ica, range(cfg.n_shifts), range(n_sen), range(len(winds))
+        )
+    )
+
+    np.save(cfg.mfcc_mi_paths[band], mi)
diff --git a/rawdata/code/analysis/meg_env_audio_mfcc_ica_mi.html.dvc b/rawdata/code/analysis/meg_env_audio_mfcc_ica_mi.html.dvc
@@ -0,0 +1,4 @@
+outs:
+- md5: ca4d854a19bb82cdc79f2e56620ec1e2
+  size: 18564683
+  path: meg_env_audio_mfcc_ica_mi.html
diff --git a/rawdata/code/analysis/meg_env_audio_mfcc_ica_mi.html_.dvc b/rawdata/code/analysis/meg_env_audio_mfcc_ica_mi.html_.dvc
@@ -0,0 +1,4 @@
+outs:
+- md5: 30da2570f3ba5d2fbbfd918f498e02b1
+  size: 16170583
+  path: meg_env_audio_mfcc_ica_mi.html_
diff --git a/rawdata/code/analysis/plot_mi.py b/rawdata/code/analysis/plot_mi.py
@@ -0,0 +1,58 @@
+import matplotlib.pyplot as plt  # type: ignore
+import mne  # type: ignore
+import numpy as np
+from mne import Report
+from mne.io import read_raw_fif  # type: ignore
+from mne.viz import plot_topomap  # type: ignore
+from speech import config as cfg  # type: ignore
+
+report = Report()
+
+n_shifts = 11
+shifts = np.linspace(-1, 1, n_shifts)
+
+raw = read_raw_fif(cfg.meg_env['alpha'])
+idx_grad = mne.pick_types(raw.info, meg="grad")
+idx_mag = mne.pick_types(raw.info, meg="mag")
+info_grad = raw.copy().pick_channels([raw.ch_names[c] for c in idx_grad]).info
+info_mag = raw.copy().pick_channels([raw.ch_names[c] for c in idx_mag]).info
+
+for band in cfg.bands:
+
+    d = np.load(cfg.mfcc_mi_paths[band])
+    if d.ndim == 3:
+        d = d.mean(axis=2) / d.std(axis=2) * np.sqrt(d.shape[2])
+        # d = d.mean(axis=2)
+
+    figs = []
+    captions = []
+    for time_idx in range(n_shifts):
+        fig, ax = plt.subplots(1, 2)
+        ax[0].set_title("magnetometers")
+        ax[1].set_title("gradiometers")
+        d_mag = d[idx_mag, time_idx]
+        # im, c = plot_topomap(d_mag, info_mag, show=False, vmax=0.025, axes=ax[0])
+        im, c = plot_topomap(d_mag, info_mag, show=False, axes=ax[0])
+        plt.colorbar(im, ax=ax[0])
+        d_grad = d[idx_grad, time_idx]
+        # im, c = plot_topomap(d_grad, info_grad, show=False, vmax=0.025, axes=ax[1])
+        im, c = plot_topomap(d_grad, info_grad, show=False, axes=ax[1])
+        plt.colorbar(im, ax=ax[1])
+        fig.set_figwidth(20)
+        figs.append(fig)
+        shift_sec = round(shifts[time_idx], 2)
+        captions.append(f"audio shift = {shift_sec} sec")
+
+    report.add_figure(
+        fig=figs, title=f"{band=} {cfg.bands[band]}", caption=captions
+    )
+    fig = plt.figure()
+    fig.set_figwidth(20)
+    plt.plot(shifts, d.max(axis=0))
+    plt.grid()
+    plt.xlabel("Audio shift, sec")
+    plt.ylabel("MI, max over sensors")
+    plt.xticks(shifts)
+    report.add_figure(fig, title='', caption="Max MEG env-audio mfccs MI")
+
+report.save("meg_env_audio_mfcc_ica_mi.html", overwrite=True)
diff --git a/rawdata/code/configs/011-annotate_premaxfilt.yaml b/rawdata/code/configs/011-annotate_premaxfilt.yaml
@@ -0,0 +1,19 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.011-annotate_premaxfilt}
+
+input:
+  raw: ${paths.root_data.raw}
+output:
+  bad_ch: ${deriv_paths.bad_ch}
+  annots: ${deriv_paths.annots}
+
+
+lowpass: 100
+highpass: null
+n_channels: 50
diff --git a/rawdata/code/configs/021-apply_maxfilter.yaml b/rawdata/code/configs/021-apply_maxfilter.yaml
@@ -0,0 +1,19 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.021-apply_maxfilter}
+
+input:
+  raw: ${paths.root_data.raw}
+  ct: ${paths.root_data.ct}
+  cal: ${paths.root_data.cal}
+  bad_ch: ${paths.011-annotate_premaxfilt.bad_ch}
+  annots: ${paths.011-annotate_premaxfilt.annots}
+output:
+  maxfilt_raw: ${deriv_paths.maxfilt_raw}
+
+t_window: auto
diff --git a/rawdata/code/configs/031-annotate_postmaxfilt.yaml b/rawdata/code/configs/031-annotate_postmaxfilt.yaml
@@ -0,0 +1,22 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.031-annotate_postmaxfilt}
+
+input:
+  raw: ${paths.021-apply_maxfilter.maxfilt_raw}
+  annots: ${paths.011-annotate_premaxfilt.annots}
+
+output:
+  annots: ${deriv_paths.annots}
+
+
+mode: EDIT
+
+lowpass: 100
+highpass: null
+n_channels: 50
diff --git a/rawdata/code/configs/032-annotate_speech.yaml b/rawdata/code/configs/032-annotate_speech.yaml
@@ -0,0 +1,15 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.032-annotate_speech}
+
+input:
+  raw: ${paths.021-apply_maxfilter.maxfilt_raw}
+output:
+  annots: ${deriv_paths.annots}
+
+mode: EDIT
diff --git a/rawdata/code/configs/033-annotate_covert.yaml b/rawdata/code/configs/033-annotate_covert.yaml
@@ -0,0 +1,17 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.033-annotate_covert}
+
+input:
+  raw: ${paths.021-apply_maxfilter.maxfilt_raw}
+output:
+  annots: ${deriv_paths.annots}
+
+
+check: true
+decim: 20
diff --git a/rawdata/code/configs/041-compute_ica.yaml b/rawdata/code/configs/041-compute_ica.yaml
@@ -0,0 +1,25 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.041-compute_ica}
+
+input:
+  raw: ${paths.021-apply_maxfilter.maxfilt_raw}
+  annots: ${paths.031-annotate_postmaxfilt.annots}
+output:
+  solution: ${deriv_paths.solution}
+  report: ${deriv_paths.report}
+
+ica_init:
+  n_components: 0.99
+  random_state: 28
+ica_fit:
+  decim: 3
+  annot_rej: True
+filt:
+  l_freq: 1
+  h_freq: null
diff --git a/rawdata/code/configs/051-inspect_ica.yaml b/rawdata/code/configs/051-inspect_ica.yaml
@@ -0,0 +1,19 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.051-inspect_ica}
+
+input:
+  raw: ${paths.021-apply_maxfilter.maxfilt_raw}
+  ica: ${paths.041-compute_ica.solution}
+  annots: ${paths.031-annotate_postmaxfilt.annots}
+output:
+  bad_ics: ${deriv_paths.bad_ics}
+
+filt:
+  l_freq: 1
+  h_freq: null
diff --git a/rawdata/code/configs/061-apply_ica.yaml b/rawdata/code/configs/061-apply_ica.yaml
@@ -0,0 +1,15 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.061-apply_ica}
+
+input:
+  raw: ${paths.021-apply_maxfilter.maxfilt_raw}
+  bad_ics: ${paths.051-inspect_ica.bad_ics}
+  ica: ${paths.041-compute_ica.solution}
+output:
+  raw: ${deriv_paths.raw}
diff --git a/rawdata/code/configs/071-annotate_muscles.yaml b/rawdata/code/configs/071-annotate_muscles.yaml
@@ -0,0 +1,24 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.071-annotate_muscles}
+
+input:
+  raw: ${paths.061-apply_ica.raw}
+output:
+  annots: ${deriv_paths.annots}
+
+# Rerun mne.preprocessing.annotate_muscle_zscore(), or edit annotations from
+# the previous run; can be NEW for the former or EDIT for the latter
+# If output.annots file doesn't exist, we fall back to the "new" mode
+mode: EDIT
+
+annotate_muscle_params:
+  threshold: 5
+  filter_freq: [110, 200]
+  min_length_good: 1
+  ch_type: mag
diff --git a/rawdata/code/configs/081-align_audio.yaml b/rawdata/code/configs/081-align_audio.yaml
@@ -0,0 +1,22 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+  - optional subject_overrides: 081-align_audio_sub-${subject}
+
+deriv_paths: ${paths.081-align_audio}
+
+input:
+  raw: ${paths.061-apply_ica.raw}
+  audio_hr: ${paths.root_data.audio}
+output:
+  aligned_audio: ${deriv_paths.aligned_audio}
+  report: ${deriv_paths.report}
+
+audio_ch: MISC008
+audio_dsamp_freq: 22050
+
+correction_samp: 1
+report_segments_sec: [[10, 20]]
diff --git a/rawdata/code/configs/091-resample.yaml b/rawdata/code/configs/091-resample.yaml
@@ -0,0 +1,15 @@
+defaults:
+  - schema
+  - paths@paths
+  - hydra
+  - _self_
+  - subject: test
+
+deriv_paths: ${paths.091-resample}
+
+input:
+  raw: ${paths.061-apply_ica.raw}
+output:
+  raw: ${deriv_paths.raw}
+
+sfreq: 500
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		/meg_env_audio_mfcc_ica_mi.html
		/meg_env_audio_mfcc_ica_mi.html_