ROVI-org · WardLT · Nov 22, 2022 · Nov 22, 2022 · Nov 22, 2022
diff --git a/batdata/extractors/maccor.py b/batdata/extractors/maccor.py
@@ -1,4 +1,5 @@
 """Extractor for MACCOR (untested!!)"""
+import itertools
 from typing import Union, List, Iterator, Tuple
 
 import pandas as pd
@@ -13,14 +14,26 @@
 class MACCORExtractor(BatteryDataExtractor):
     """Parser for reading from Arbin-format files
 
-    Expects the files to be ASCII files with a .### extension
+    Expects the files to be ASCII files with a .### extension.
+    The :meth:`group` operation will consolidate files such that all with
+    the same prefix (i.e., everything except the numerals in the extension)
+    are treated as part of the same experiment.
     """
 
     def group(self, files: Union[str, List[str]], directories: List[str] = None,
               context: dict = None) -> Iterator[Tuple[str, ...]]:
-        for file in files:
-            if file[-3:].isdigit():
-                yield file
+        if isinstance(files, str):
+            files = [files]
+
+        # Get only the MACCOR-style names
+        valid_names = filter(lambda x: x[-3:].isdigit(), files)
+
+        # Split then sort based on the prefix
+        split_filenames = sorted(name.rsplit(".", maxsplit=1) for name in valid_names)
+
+        # Return groups
+        for prefix, group in itertools.groupby(split_filenames, key=lambda x: x[0]):
+            yield tuple('.'.join(x) for x in group)
 
     def generate_dataframe(self, file: str, file_number: int = 0, start_cycle: int = 0,
                            start_time: int = 0) -> pd.DataFrame:

diff --git a/batdata/extractors/tests/test_maccor.py b/batdata/extractors/tests/test_maccor.py
@@ -1,13 +1,32 @@
 """Tests related to the MACCOR parser"""
 
 import os
+
+from pytest import fixture
+
 from batdata.extractors.maccor import MACCORExtractor
 
 test_file = os.path.join(os.path.dirname(__file__), 'files', 'maccor_example.001')
 
 
-def test_validation():
+@fixture()
+def extractor():
+    return MACCORExtractor()
+
+
+def test_validation(extractor):
     """Make sure the parser generates valid outputs"""
-    extractor = MACCORExtractor()
     data = extractor.parse_to_dataframe([test_file])
     data.validate_columns(allow_extra_columns=False)
+
+
+def test_grouping(extractor, tmp_path):
+    # Make a file structure with two sets of experiments and a nonsense file
+    for f in ['README', 'testA.002', 'testA.001', 'testB.001']:
+        (tmp_path / f).write_text('junk')
+
+    # Test the grouping
+    groups = list(extractor.identify_files(tmp_path))
+    assert len(groups) == 2
+    assert (str(tmp_path / 'testA.001'), str(tmp_path / 'testA.002')) in groups
+    assert (str(tmp_path / 'testB.001'),) in groups