From 157921e26a0f3f56a0b86a241d443c92b1ced594 Mon Sep 17 00:00:00 2001 From: Dmitrii Altukhov Date: Fri, 15 Jul 2022 12:09:16 +0300 Subject: [PATCH] add interface to the dataset - add code to load the data - add setup.py - add py.typed marker - ignore egg --- .gitignore | 1 + py.typed | 0 setup.py | 9 ++++++++ speech_meg.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+) create mode 100644 py.typed create mode 100644 setup.py create mode 100644 speech_meg.py diff --git a/.gitignore b/.gitignore index bee8a64..c624398 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ __pycache__ +speech_meg.egg-info diff --git a/py.typed b/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8206968 --- /dev/null +++ b/setup.py @@ -0,0 +1,9 @@ +from setuptools import setup + +setup( + name="speech_meg", + version="0.0.1", + author="dmitrii altukhov", + author_email="dm.altukhov@ya.ru", + py_modules=["speech_meg"], +) diff --git a/speech_meg.py b/speech_meg.py new file mode 100644 index 0000000..1a02622 --- /dev/null +++ b/speech_meg.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import hydra +import librosa as lb # type: ignore +import mne # type: ignore +import numpy as np +import numpy.typing as npt +from ndp.signal import Signal, Signal1D +from ndp.signal.annotations import Annotation, Annotations + +BIDS_ROOT = Path(__file__).parent / "rawdata" + + +@dataclass +class Info: + """Meg speech dataset info""" + mne_info: mne.Info + + +def read_subject(subject: str) -> tuple[Signal[npt._32Bit], Signal1D[npt._32Bit], Info]: + with hydra.initialize(config_path="rawdata/code/configs"): + overrides = [f"bids_root={str(BIDS_ROOT)}", f"+subject={subject}"] + paths = hydra.compose(config_name="paths", overrides=overrides) + raw_path = paths["091-resample"].raw + audio_path = paths["081-align_audio"].aligned_audio + annotations_path = paths["101-merge_annotations"].annots + return _read_dataset(raw_path, audio_path, annotations_path) + + +def _read_dataset( + raw_path: str, audio_path: str, annotations_path: str +) -> tuple[Signal[npt._32Bit], Signal1D[npt._32Bit], Info]: + X, info = _read_raw(raw_path, annotations_path) + Y = _read_wav(audio_path) + Y.annotations = X.annotations + assert abs(X.duration - Y.duration) < 0.01, "inconsistent durations for audio and MEG" + return X, Y, info + + +def _read_wav(path: str, sr: int | None = None) -> Signal1D[npt._32Bit]: + data, sr_final = lb.load(path, sr=sr) # pyright: ignore + return Signal1D(data[:, np.newaxis], sr_final, []) + + +def _read_raw(raw_path: str, annot_path: str | None) -> tuple[Signal[npt._32Bit], Info]: + raw = mne.io.read_raw_fif(raw_path, verbose="ERROR", preload=True) + if annot_path is not None: + annots = mne.read_annotations(annot_path) + raw.set_annotations(annots) + X_data = raw.get_data(picks="meg").astype("float32").T + return Signal(X_data, raw.info["sfreq"], _annotations_from_raw(raw)), Info(raw.info) + + +def _annotations_from_raw(raw: mne.io.BaseRaw) -> Annotations: + if not hasattr(raw, "annotations"): + return [] + onsets: list[float] = list(raw.annotations.onset) + durations: list[float] = list(raw.annotations.duration) + types: list[str] = list(raw.annotations.description) + onsets = [o - raw.first_samp / raw.info["sfreq"] for o in raw.annotations.onset] + return [Annotation(o, d, t) for o, d, t, in zip(onsets, durations, types)]