Skip to content

Commit

Permalink
Group multiple files from same MACCOR test together (#13)
Browse files Browse the repository at this point in the history
* Make a test that will fail with the current grouper

* Improve grouping of the maccor extractor
  • Loading branch information
WardLT authored Nov 22, 2022
1 parent e11e3e9 commit 27a6712
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 6 deletions.
21 changes: 17 additions & 4 deletions batdata/extractors/maccor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Extractor for MACCOR (untested!!)"""
import itertools
from typing import Union, List, Iterator, Tuple

import pandas as pd
Expand All @@ -13,14 +14,26 @@
class MACCORExtractor(BatteryDataExtractor):
"""Parser for reading from Arbin-format files
Expects the files to be ASCII files with a .### extension
Expects the files to be ASCII files with a .### extension.
The :meth:`group` operation will consolidate files such that all with
the same prefix (i.e., everything except the numerals in the extension)
are treated as part of the same experiment.
"""

def group(self, files: Union[str, List[str]], directories: List[str] = None,
context: dict = None) -> Iterator[Tuple[str, ...]]:
for file in files:
if file[-3:].isdigit():
yield file
if isinstance(files, str):
files = [files]

# Get only the MACCOR-style names
valid_names = filter(lambda x: x[-3:].isdigit(), files)

# Split then sort based on the prefix
split_filenames = sorted(name.rsplit(".", maxsplit=1) for name in valid_names)

# Return groups
for prefix, group in itertools.groupby(split_filenames, key=lambda x: x[0]):
yield tuple('.'.join(x) for x in group)

def generate_dataframe(self, file: str, file_number: int = 0, start_cycle: int = 0,
start_time: int = 0) -> pd.DataFrame:
Expand Down
23 changes: 21 additions & 2 deletions batdata/extractors/tests/test_maccor.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,32 @@
"""Tests related to the MACCOR parser"""

import os

from pytest import fixture

from batdata.extractors.maccor import MACCORExtractor

test_file = os.path.join(os.path.dirname(__file__), 'files', 'maccor_example.001')


def test_validation():
@fixture()
def extractor():
return MACCORExtractor()


def test_validation(extractor):
"""Make sure the parser generates valid outputs"""
extractor = MACCORExtractor()
data = extractor.parse_to_dataframe([test_file])
data.validate_columns(allow_extra_columns=False)


def test_grouping(extractor, tmp_path):
# Make a file structure with two sets of experiments and a nonsense file
for f in ['README', 'testA.002', 'testA.001', 'testB.001']:
(tmp_path / f).write_text('junk')

# Test the grouping
groups = list(extractor.identify_files(tmp_path))
assert len(groups) == 2
assert (str(tmp_path / 'testA.001'), str(tmp_path / 'testA.002')) in groups
assert (str(tmp_path / 'testB.001'),) in groups

0 comments on commit 27a6712

Please sign in to comment.