diff --git a/e4e_data_management/data.py b/e4e_data_management/data.py index 0799207..63141a4 100644 --- a/e4e_data_management/data.py +++ b/e4e_data_management/data.py @@ -7,12 +7,15 @@ import pickle from hashlib import sha256 from pathlib import Path -from typing import Dict, Generator, List, Optional, Union, Iterable, Callable +from typing import (Callable, Dict, Generator, Iterable, List, Optional, Set, + Union) from e4e_data_management.metadata import Metadata class Manifest: + """Manifest of files + """ def __init__(self, path: Path, root: Optional[Path] = None): self.__path = path if root is None: @@ -24,6 +27,19 @@ def validate(self, files: Iterable[Path], *, method: str = 'hash') -> bool: + """Validates the files against the specified manifest + + Args: + manifest (Dict[str, Dict[str, Union[str, int]]]): Manifest to verify against + files (Iterable[Path]): Files to verify + method (str, optional): Verification method. Defaults to 'hash'. + + Raises: + NotImplementedError: Unsupported verification method + + Returns: + bool: True if valid, otherwise False + """ for file in files: file_key = file.relative_to(self.__root) if file_key not in manifest: @@ -41,10 +57,20 @@ def validate(self, return True def get_dict(self) -> Dict[str, Dict[str, Union[str, int]]]: + """Retrieves the dictionary of files and checksums + + Returns: + Dict[str, Dict[str, Union[str, int]]]: Dictionary of files, checksums and sizes + """ with open(self.__path, 'r', encoding='ascii') as handle: return json.load(handle) def generate(self, files: Iterable[Path]): + """Generates the manifest with only the specified files + + Args: + files (Iterable[Path]): Files to make manifest to + """ data = self.__compute_hashes( root=self.__root, files=files @@ -56,8 +82,13 @@ def __write(self, data: Dict[str, Dict[str, Union[str, int]]]) -> None: json.dump(data, handle, indent=4) def update(self, files: Iterable[Path]): + """Updates the manifest with the specified files + + Args: + files (Iterable[Path]): Iterable of new files + """ data = self.get_dict() - files_to_checksum = (file + files_to_checksum = (file for file in files if file.relative_to(self.__root).as_posix() not in data) new_checksums = self.__compute_hashes( @@ -131,12 +162,21 @@ def get_files(self) -> Generator[Path, None, None]: @classmethod def load(cls, path: Path) -> Mission: + """Loads mission from disk + + Args: + path (Path): Path to mission folder + + Returns: + Mission: Mission object + """ metadata = Metadata.load(path) return Mission(path=path, mission_metadata=metadata) class Dataset: """Dataset """ + # pylint: disable=too-many-instance-attributes __MANIFEST_NAME = 'manifest.json' __CONFIG_NAME = '.e4edm.pkl' @@ -147,9 +187,10 @@ def __init__(self, root: Path, day_0: dt.date): self.last_country: Optional[str] = None self.last_region: Optional[str] = None self.last_site: Optional[str] = None - self.countries: List[str] = [] - self.regions: List[str] = [] - self.sites: List[str] = [] + self.countries: Set[str] = set() + self.regions: Set[str] = set() + self.sites: Set[str] = set() + self.devices: Set[str] = set() self.missions: List[Mission] = [] self.manifest = Manifest(self.root.joinpath(self.__MANIFEST_NAME)) @@ -237,8 +278,18 @@ def add_mission(self, metadata: Metadata) -> None: mission.create() self.missions.append(mission) self.manifest.update(self.get_new_files()) + + self.countries.add(metadata.country) + self.last_country = metadata.country + self.regions.add(metadata.region) + self.last_region = metadata.region + self.sites.add(metadata.site) + self.last_site = metadata.site + self.devices.add(metadata.device) self.save() def create(self) -> None: + """Creates the folder and file structure + """ self.root.mkdir(parents=True, exist_ok=False) - self.manifest.generate(self.get_files()) \ No newline at end of file + self.manifest.generate(self.get_files()) diff --git a/tests/test_mission.py b/tests/test_mission.py index 51e5573..2236370 100644 --- a/tests/test_mission.py +++ b/tests/test_mission.py @@ -9,6 +9,76 @@ from e4e_data_management.metadata import Metadata +def test_create_multiple_missions(): + """Tests creating multiple missions + """ + with TemporaryDirectory() as root_dir: + root = Path(root_dir) + app = DataManager( + app_config_dir=root + ) + app.initialize_dataset( + date=dt.date(2023, 3, 1), + project='TEST', + location='San Diego', + directory=root + ) + + app.initialize_mission( + metadata=Metadata( + timestamp=dt.datetime.fromisoformat('2023-03-01T23:59-08:00'), + device='Device 1', + country='USA', + region='California', + site='Site 1', + mission='RUN001', + ) + ) + + app.initialize_mission( + metadata=Metadata( + timestamp=dt.datetime.fromisoformat('2023-03-02T00:00-08:00'), + device='Device 1', + country='USA', + region='California', + site='Site 2', + mission='RUN002', + ) + ) + + app.initialize_mission( + metadata=Metadata( + timestamp=dt.datetime.fromisoformat('2023-03-02T00:02-08:00'), + device='Device 1', + country='USA', + region='California', + site='Site 2', + mission='RUN003', + ) + ) + dataset_dir = root.joinpath('2023.03.TEST.San Diego') + current_files = sorted([file.relative_to(dataset_dir) for file in dataset_dir.rglob('*')]) + expected_files = sorted([ + Path('.e4edm.pkl'), + Path('manifest.json'), + Path('ED-00'), + Path('ED-00', 'RUN001'), + Path('ED-00', 'RUN001', 'metadata.json'), + Path('ED-00', 'RUN001', 'manifest.json'), + Path('ED-01'), + Path('ED-01', 'RUN002'), + Path('ED-01', 'RUN002', 'metadata.json'), + Path('ED-01', 'RUN002', 'manifest.json'), + Path('ED-01', 'RUN003'), + Path('ED-01', 'RUN003', 'metadata.json'), + Path('ED-01', 'RUN003', 'manifest.json'), + ]) + assert current_files == expected_files + + assert len(app.active_dataset.missions) == 3 + assert app.active_dataset.sites == {'Site 1', 'Site 2'} + + def test_create_mission(): """Tests creating a mission """ @@ -65,3 +135,11 @@ def test_create_mission(): config = root.joinpath('2023.03.TEST.San Diego', '.e4edm.pkl') config_entry = config.relative_to(root.joinpath('2023.03.TEST.San Diego')).as_posix() assert config_entry not in manifest + + assert 'USA' in app.active_dataset.countries + assert 'San Diego' in app.active_dataset.regions + assert 'Site 1' in app.active_dataset.sites + + assert app.active_dataset.last_country == 'USA' + assert app.active_dataset.last_region == 'San Diego' + assert app.active_dataset.last_site == 'Site 1'