Merge pull request #59 from yjmantilla/exemplar_file_tests

Exemplar file tests
yjmantilla · Sep 11, 2022 · 0a762cf · 0a762cf
2 parents 62306cd + cb87293
commit 0a762cf
Show file tree

Hide file tree

Showing 8 changed files with 160 additions and 53 deletions.
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -13,6 +13,7 @@ flask
 uvicorn
 fastapi-jsonrpc
 versioneer
+pybv >= 0.7.3
 https://api.github.com/repos/mne-tools/mne-bids/zipball/main
 git+https://github.com/yjmantilla/bidscoin.git@sovabids
 -e .
diff --git a/requirements-user.txt b/requirements-user.txt
@@ -1,3 +1,4 @@
 bids_validator
+pybv >= 0.7.3
 https://api.github.com/repos/mne-tools/mne-bids/zipball/main
 .
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 requests
-pybv
+pybv >= 0.7.3
 pyyaml
 pandas
 fastapi-jsonrpc
diff --git a/sovabids/datasets.py b/sovabids/datasets.py
@@ -9,6 +9,8 @@
 import mne
 import numpy as np
 from mne_bids.write import _write_raw_brainvision
+import fileinput
+
 
 def lemon_prepare():
     """Download and prepare a few files of the LEMON dataset.
@@ -121,23 +123,24 @@ def lemon_bidscoin_prepare(src_path):
             print('already done, skipping...')
     print('finish')
 
-def make_dummy_dataset(PATTERN='T%task%/S%session%/sub%subject%_%acquisition%_%run%',
+def make_dummy_dataset(EXAMPLE,
+    PATTERN='T%task%/S%session%/sub%subject%_%acquisition%_%run%',
     DATASET = 'DUMMY',
     NSUBS = 2,
     NSESSIONS = 2,
     NTASKS = 2,
     NACQS = 2,
     NRUNS = 2,
-    NCHANNELS = 2,
-    SFREQ = 200,
-    STOP = 10,
-    NUMEVENTS = 10,
     PREFIXES = {'subject':'SU','session':'SE','task':'TA','acquisition':'AC','run':'RU'},
-    ROOT=None):
+    ROOT=None,
+):
     """Create a dummy dataset given some parameters.
     
     Parameters
     ----------
+    EXAMPLE : str,PathLike|list , required
+        Path of the file to replicate as each file in the dummy dataset.
+        If a list, it is assumed each item is a file. All of these items are replicated.
     PATTERN : str, optional
         The pattern in placeholder notation using the following fields:
         %dataset%, %task%, %session%, %subject%, %run%, %acquisition%
@@ -153,20 +156,13 @@ def make_dummy_dataset(PATTERN='T%task%/S%session%/sub%subject%_%acquisition%_%r
         Number of acquisitions.
     NRUNS : int, optional
         Number of runs.
-    NCHANNELS : int, optional
-        Number of channels.
-    SFREQ : float, optional
-        Samplinf frequency of the data.
-    STOP : float, optional
-        Time duration of the data in seconds.
-    NUMEVENTS : int, optional
-        Number of events along the duration.
     PREFIXES : dict, optional
         Dictionary with the following keys:'subject', 'session', 'task' and 'acquisition'.
         The values are the corresponding prefix. RUN is not present because it has to be a number.
     ROOT : str, optional
         Path where the files will be generated.
         If None, the _data subdir will be used.
+
     """
 
     if ROOT is None:
@@ -176,8 +172,6 @@ def make_dummy_dataset(PATTERN='T%task%/S%session%/sub%subject%_%acquisition%_%r
         data_dir = ROOT
     os.makedirs(data_dir,exist_ok=True)
 
-
-
     sub_zeros = get_num_digits(NSUBS)
     subs = [ PREFIXES['subject']+ str(x).zfill(sub_zeros) for x in range(NSUBS)]
 
@@ -193,17 +187,6 @@ def make_dummy_dataset(PATTERN='T%task%/S%session%/sub%subject%_%acquisition%_%r
     acq_zeros = get_num_digits(NACQS)
     acquisitions = [ PREFIXES['acquisition']+str(x).zfill(acq_zeros) for x in range(NACQS)]
 
-    # Create some dummy metadata
-    n_channels = NCHANNELS
-    sampling_freq = SFREQ  # in Hertz
-    info = mne.create_info(n_channels, sfreq=sampling_freq)
-
-    times = np.linspace(0, STOP, STOP*sampling_freq, endpoint=False)
-    data = np.zeros((NCHANNELS,times.shape[0]))
-
-    raw = mne.io.RawArray(data, info)
-    raw.set_channel_types({x:'eeg' for x in raw.ch_names})
-    new_events = mne.make_fixed_length_events(raw, duration=STOP//NUMEVENTS)
 
     for task in tasks:
         for session in sessions:
@@ -218,5 +201,116 @@ def make_dummy_dataset(PATTERN='T%task%/S%session%/sub%subject%_%acquisition%_%r
                         dummy = dummy.replace('%acquisition%',acq)
                         path = [data_dir] +dummy.split('/')
                         fpath = os.path.join(*path)
-                        _write_raw_brainvision(raw,fpath,new_events,overwrite=True)
+                        dirpath = os.path.join(*path[:-1])
+                        os.makedirs(dirpath,exist_ok=True)
+                        if isinstance(EXAMPLE,list):
+                            for ff in EXAMPLE:
+                                fname, ext = os.path.splitext(ff)
+                                shutil.copyfile(ff, fpath+ext)
+                                if 'vmrk' in ext or 'vhdr' in ext:
+                                    replace_brainvision_filename(fpath+ext,path[-1])
+                        else:
+                            fname, ext = os.path.splitext(EXAMPLE)
+                            shutil.copyfile(EXAMPLE, fpath+ext)
+
+def get_dummy_raw(NCHANNELS = 5,
+    SFREQ = 200,
+    STOP = 10,
+    NUMEVENTS = 10,
+):
+    """
+    Create a dummy MNE Raw file given some parameters.
+
+    Parameters
+    ----------
+    NCHANNELS : int, optional
+        Number of channels.
+    SFREQ : float, optional
+        Sampling frequency of the data.
+    STOP : float, optional
+        Time duration of the data in seconds.
+    NUMEVENTS : int, optional
+        Number of events along the duration.
+    """
+    # Create some dummy metadata
+    n_channels = NCHANNELS
+    sampling_freq = SFREQ  # in Hertz
+    info = mne.create_info(n_channels, sfreq=sampling_freq)
+
+    times = np.linspace(0, STOP, STOP*sampling_freq, endpoint=False)
+    data = np.zeros((NCHANNELS,times.shape[0]))
+
+    raw = mne.io.RawArray(data, info)
+    raw.set_channel_types({x:'eeg' for x in raw.ch_names})
+    new_events = mne.make_fixed_length_events(raw, duration=STOP//NUMEVENTS)
 
+    return raw,new_events
+
+def save_dummy_vhdr(fpath,dummy_args={}
+):
+    """
+    Save a dummy vhdr file.
+
+    Parameters
+    ----------
+    fpath : str, required
+        Path where to save the file.
+    kwargs : dict, optional
+        Dictionary with the arguments of the get_dummy_raw function.
+
+    Returns
+    -------
+    List with the Paths of the desired vhdr file, if those were succesfully created,
+    None otherwise.
+    """
+
+    raw,new_events = get_dummy_raw(**dummy_args)
+    _write_raw_brainvision(raw,fpath,new_events,overwrite=True)
+    eegpath =fpath.replace('.vhdr','.eeg')
+    vmrkpath = fpath.replace('.vhdr','.vmrk')
+    if all(os.path.isfile(x) for x in [fpath,eegpath,vmrkpath]):
+        return [fpath,eegpath,vmrkpath]
+    else:
+        return None
+
+def save_dummy_cnt(fpath,
+):
+    """
+    Save a dummy cnt file.
+
+    Parameters
+    ----------
+    fpath : str, required
+        Path where to save the file.
+
+    Returns
+    -------
+    Path of the desired file if the file was succesfully created,
+    None otherwise.
+    """
+    fname = 'scan41_short.cnt'
+    cnt_dict={'dataset_name': 'cnt_sample',
+    'archive_name': 'scan41_short.cnt',
+    'hash': 'md5:7ab589254e83e001e52bee31eae859db',
+    'url': 'https://github.com/mne-tools/mne-testing-data/blob/master/CNT/scan41_short.cnt?raw=true',
+    'folder_name': 'cnt_sample',
+    }
+    data_path = mne.datasets.fetch_dataset(cnt_dict)
+    shutil.copyfile(os.path.join(data_path,'scan41_short.cnt'), fpath) #copyfile overwrites by default
+    if os.path.isfile(fpath):
+        return fpath
+    else:
+        return None
+
+def replace_brainvision_filename(fpath,newname):
+    if '.eeg' in newname:
+        newname = newname.replace('.eeg','')
+    if '.vmrk' in newname:
+        newname = newname.replace('.vmrk','')
+    for line in fileinput.input(fpath, inplace=True):
+        if 'DataFile' in line:
+            print(f'DataFile={newname}.eeg'.format(fileinput.filelineno(), line))
+        elif 'MarkerFile' in line:
+            print(f'MarkerFile={newname}.vmrk'.format(fileinput.filelineno(), line))
+        else:
+            print('{}'.format(line), end='')
diff --git a/sovabids/rules.py b/sovabids/rules.py
@@ -270,6 +270,9 @@ def apply_rules_to_single_file(file,rules,bids_path,write=False,preview=False):
                 bids_path = bids_path.copy()
                 bids_path = bids_path.update(
                     datatype=datatype, suffix=datatype, extension=ext)
+                if bids_path.datatype in ['eeg', 'ieeg']:
+                    if ext not in ['.vhdr', '.edf', '.bdf', '.EDF']:
+                        bids_path.update(extension='.vhdr')
                 ##################################################################
 
 

diff --git a/tests/test_bids.py b/tests/test_bids.py
@@ -10,10 +10,10 @@
 from sovabids.parsers import placeholder_to_regex,_modify_entities_of_placeholder_pattern
 from sovabids.rules import apply_rules,load_rules
 from sovabids.dicts import deep_merge_N
-from sovabids.datasets import make_dummy_dataset
+from sovabids.datasets import make_dummy_dataset,save_dummy_vhdr,save_dummy_cnt
 from sovabids.convert import convert_them
 
-def dummy_dataset(pattern_type='placeholder',write=True,mode='python'):
+def dummy_dataset(pattern_type='placeholder',write=True,mode='python',format='.vhdr'):
 
     # Getting current file path and then going to _data directory
     this_dir = os.path.dirname(__file__)
@@ -24,20 +24,22 @@ def dummy_dataset(pattern_type='placeholder',write=True,mode='python'):
     test_root = os.path.join(data_dir,'DUMMY')
     input_root = os.path.join(test_root,'DUMMY_SOURCE')
     mode_str = '_' + mode
-    bids_path = os.path.join(test_root,'DUMMY_BIDS'+'_'+pattern_type+mode_str)
+    bids_path = os.path.join(test_root,'DUMMY_BIDS'+'_'+pattern_type+mode_str+'_'+format.replace('.',''))
+
+    # Make example File
+    if format == '.vhdr':
+        example_fpath = save_dummy_vhdr(os.path.join(data_dir,'dummy.vhdr'))
+    elif format == '.cnt':
+        example_fpath = save_dummy_cnt(os.path.join(data_dir,'dummy.cnt'))
 
     # PARAMS for making the dummy dataset
-    DATA_PARAMS ={ 'PATTERN':'T%task%/S%session%/sub%subject%_%acquisition%_%run%',
+    DATA_PARAMS ={ 'EXAMPLE':example_fpath,
+        'PATTERN':'T%task%/S%session%/sub%subject%_%acquisition%_%run%',
         'DATASET' : 'DUMMY',
         'NSUBS' : 2,
         'NTASKS' : 2,
         'NRUNS' : 2,
         'NSESSIONS' : 2,
-        'NCHANNELS' : 32,
-        'NACQS' :2,
-        'SFREQ' : 200,
-        'STOP' : 10,
-        'NUMEVENTS' : 10,
         'ROOT' : input_root
     }
 
@@ -65,7 +67,7 @@ def dummy_dataset(pattern_type='placeholder',write=True,mode='python'):
     FIXED_PATTERN =DATA_PARAMS.get('PATTERN',None)
 
     FIXED_PATTERN = _modify_entities_of_placeholder_pattern(FIXED_PATTERN,'append')
-    FIXED_PATTERN = FIXED_PATTERN + '.' + 'vhdr'
+    FIXED_PATTERN = FIXED_PATTERN + format
 
     # Making the rules dictionary
     data={
@@ -82,13 +84,13 @@ def dummy_dataset(pattern_type='placeholder',write=True,mode='python'):
         },
     'non-bids':
         {
-        'eeg_extension':'.vhdr',
+        'eeg_extension':format,
         'path_analysis':{'pattern':FIXED_PATTERN},
         'code_execution':['print(\'some good code\')','print(raw.info)','print(some bad code)']
         },
     'channels':
-        {'name':{'0':'ECG_CHAN','1':'EOG_CHAN'},
-        'type':{'ECG_CHAN':'ECG','EOG_CHAN':'EOG'}}
+        {'name':{'1':'ECG_CHAN','2':'EOG_CHAN'}, #Note example vhdr and CNT have these channels
+        'type':{'ECG_CHAN':'ECG','EOG_CHAN':'EOG'}} # Names (keys) are after the rename of the previous line
     }
 
     if pattern_type == 'regex':
@@ -228,6 +230,8 @@ def dummy_dataset(pattern_type='placeholder',write=True,mode='python'):
             print('okrpc')
     return file_mappings
 def test_dummy_dataset():
+    # apparently it cannot download the cnt consistenly on the github actions machine
+    #dummy_dataset('placeholder',write=True,format='.cnt') # Test cnt conversion
     dummy_dataset('placeholder',write=True)
     dummy_dataset('regex',write=True)
     dummy_dataset('placeholder',write=True,mode='cli')

diff --git a/tests/test_sova2coin.py b/tests/test_sova2coin.py
@@ -8,7 +8,7 @@
 from sovabids.files import _get_files
 from sovabids.settings import REPO_PATH
 from sovabids.parsers import _modify_entities_of_placeholder_pattern
-from sovabids.datasets import lemon_bidscoin_prepare,make_dummy_dataset
+from sovabids.datasets import lemon_bidscoin_prepare,make_dummy_dataset,save_dummy_vhdr
 import yaml
 
 def test_sova2coin(dataset='dummy_bidscoin',noedit=True):
@@ -59,8 +59,11 @@ def test_sova2coin(dataset='dummy_bidscoin',noedit=True):
         shutil.rmtree(source_path)
       except:
         pass
+
+      # Make example VHDR File
+      example_fpath = save_dummy_vhdr(os.path.join(data_dir,'dummy.vhdr'))
 
-      make_dummy_dataset(DATASET=dataset+'_input',NSUBS=3,NTASKS=2,NSESSIONS=2,NACQS=1,NRUNS=2,PATTERN=pat,ROOT=source_path)
+      make_dummy_dataset(EXAMPLE=example_fpath,DATASET=dataset+'_input',NSUBS=3,NTASKS=2,NSESSIONS=2,NACQS=1,NRUNS=2,PATTERN=pat,ROOT=source_path)
 
 
     files = _get_files(source_path)

diff --git a/tests/test_web_validator.sh b/tests/test_web_validator.sh
@@ -2,15 +2,16 @@
 set -e
 npm install --global npm@^7
 npm install -g bids-validator
-bids-validator _data/DUMMY/DUMMY_BIDS_placeholder_python
-bids-validator _data/DUMMY/DUMMY_BIDS_regex_python
-bids-validator _data/DUMMY/DUMMY_BIDS_placeholder_cli
-bids-validator _data/DUMMY/DUMMY_BIDS_regex_cli
-bids-validator _data/DUMMY/DUMMY_BIDS_placeholder_rpc
-bids-validator _data/DUMMY/DUMMY_BIDS_regex_rpc
-bids-validator _data/DUMMY/DUMMY_BIDS_placeholder_cli
-bids-validator _data/DUMMY/DUMMY_BIDS_regex_cli
+bids-validator _data/DUMMY/DUMMY_BIDS_placeholder_cli_vhdr/
+# bids-validator _data/DUMMY_BIDS_placeholder_python_cnt/ 
+# apparently it cannot download the cnt
+bids-validator _data/DUMMY/DUMMY_BIDS_placeholder_python_vhdr/
+bids-validator _data/DUMMY/DUMMY_BIDS_placeholder_rpc_vhdr/
+bids-validator _data/DUMMY/DUMMY_BIDS_regex_cli_vhdr/
+bids-validator _data/DUMMY/DUMMY_BIDS_regex_python_vhdr/
+bids-validator _data/DUMMY/DUMMY_BIDS_regex_rpc_vhdr/
+
 
 bids-validator _data/dummy_bidscoin_output
 echo $?
-#bids-validator _data/DUMMY/DUMMY_SOURCE
+# bids-validator _data/DUMMY/DUMMY_SOURCE