diff --git a/batdata/extractors/maccor.py b/batdata/extractors/maccor.py index 8e3ca82..5efa53a 100644 --- a/batdata/extractors/maccor.py +++ b/batdata/extractors/maccor.py @@ -1,4 +1,5 @@ """Extractor for MACCOR (untested!!)""" +import itertools from typing import Union, List, Iterator, Tuple import pandas as pd @@ -13,14 +14,26 @@ class MACCORExtractor(BatteryDataExtractor): """Parser for reading from Arbin-format files - Expects the files to be ASCII files with a .### extension + Expects the files to be ASCII files with a .### extension. + The :meth:`group` operation will consolidate files such that all with + the same prefix (i.e., everything except the numerals in the extension) + are treated as part of the same experiment. """ def group(self, files: Union[str, List[str]], directories: List[str] = None, context: dict = None) -> Iterator[Tuple[str, ...]]: - for file in files: - if file[-3:].isdigit(): - yield file + if isinstance(files, str): + files = [files] + + # Get only the MACCOR-style names + valid_names = filter(lambda x: x[-3:].isdigit(), files) + + # Split then sort based on the prefix + split_filenames = sorted(name.rsplit(".", maxsplit=1) for name in valid_names) + + # Return groups + for prefix, group in itertools.groupby(split_filenames, key=lambda x: x[0]): + yield tuple('.'.join(x) for x in group) def generate_dataframe(self, file: str, file_number: int = 0, start_cycle: int = 0, start_time: int = 0) -> pd.DataFrame: diff --git a/batdata/extractors/tests/test_maccor.py b/batdata/extractors/tests/test_maccor.py index 446e721..71e8cd1 100644 --- a/batdata/extractors/tests/test_maccor.py +++ b/batdata/extractors/tests/test_maccor.py @@ -1,13 +1,32 @@ """Tests related to the MACCOR parser""" import os + +from pytest import fixture + from batdata.extractors.maccor import MACCORExtractor test_file = os.path.join(os.path.dirname(__file__), 'files', 'maccor_example.001') -def test_validation(): +@fixture() +def extractor(): + return MACCORExtractor() + + +def test_validation(extractor): """Make sure the parser generates valid outputs""" - extractor = MACCORExtractor() data = extractor.parse_to_dataframe([test_file]) data.validate_columns(allow_extra_columns=False) + + +def test_grouping(extractor, tmp_path): + # Make a file structure with two sets of experiments and a nonsense file + for f in ['README', 'testA.002', 'testA.001', 'testB.001']: + (tmp_path / f).write_text('junk') + + # Test the grouping + groups = list(extractor.identify_files(tmp_path)) + assert len(groups) == 2 + assert (str(tmp_path / 'testA.001'), str(tmp_path / 'testA.002')) in groups + assert (str(tmp_path / 'testB.001'),) in groups