diff --git a/nomenclature/cli.py b/nomenclature/cli.py index d0324495..33cdafc1 100644 --- a/nomenclature/cli.py +++ b/nomenclature/cli.py @@ -38,6 +38,12 @@ def cli_valid_yaml(path: Path): type=str, default=None, ) +@click.option( + "--validate_data", + help="Optional name for validation folder", + type=str, + default=None, +) @click.option( "--dimension", "dimensions", @@ -51,6 +57,7 @@ def cli_valid_project( definitions: str, mappings: Optional[str], required_data: Optional[str], + validate_data: Optional[str], dimensions: Optional[List[str]], ): """Assert that `path` is a valid project nomenclature @@ -60,11 +67,13 @@ def cli_valid_project( path : Path Project directory to be validated definitions : str, optional - Name of the definitions folder, defaults to "definitions" + Name of 'definitions' folder, defaults to "definitions" mappings : str, optional - Name of the mappings folder, defaults to "mappings" (if this folder exists) + Name of 'mappings' folder, defaults to "mappings" required_data: str, optional - Name of the required data folder, default to "required_data" (if folder exists) + Name of folder for 'required data' criteria, default to "required_data" + validate_data: str, optional + Name of folder for data validation criteria, default to "validate_data" dimensions : List[str], optional Dimensions to be checked, defaults to all sub-folders of `definitions` @@ -76,23 +85,25 @@ def cli_valid_project( --dimension --dimension - - Note ---- This test includes three steps: - 1. Test that all yaml files in `definitions/` and `mappings/` can be correctly read + 1. Test that all yaml files in `definitions` and `mappings` can be correctly parsed as yaml files. This is a formal check for yaml syntax only. - 2. Test that all files in `definitions/` can be correctly parsed as a + 2. Test that all files in `definitions` can be correctly parsed as a :class:`DataStructureDefinition` object comprised of individual codelists. - 3. Test that all model mappings in `mappings/` can be correctly parsed as a + 3. Test that all model mappings in `mappings` can be correctly parsed as a :class:`RegionProcessor` object. This includes a check that all regions mentioned in a model mapping are defined in the region codelist. + 4. Test that all required-data and data-validation files can be parsed correctly + and are consistent with the `definitions`. """ assert_valid_yaml(path) - assert_valid_structure(path, definitions, mappings, required_data, dimensions) + assert_valid_structure( + path, definitions, mappings, required_data, validate_data, dimensions + ) @cli.command("check-region-aggregation") diff --git a/nomenclature/error.py b/nomenclature/error.py index 39296ce2..63926b3f 100644 --- a/nomenclature/error.py +++ b/nomenclature/error.py @@ -1,5 +1,6 @@ import textwrap from collections import namedtuple +from typing import Optional pydantic_custom_error_config = { "RegionNameCollisionError": ( @@ -47,9 +48,11 @@ class ErrorCollector: errors: list[Exception] + description: Optional[str] = None - def __init__(self) -> None: + def __init__(self, description: str = None) -> None: self.errors = [] + self.description = description def append(self, error: Exception) -> None: self.errors.append(error) @@ -57,12 +60,14 @@ def append(self, error: Exception) -> None: def __repr__(self) -> str: error = "error" if len(self.errors) == 1 else "errors" error_list_str = "\n".join( - f"{i+1}. {error}" for i, error in enumerate(self.errors) + f"{i + 1}. {error}" for i, error in enumerate(self.errors) ) - return f"Collected {len(self.errors)} {error}:\n" + textwrap.indent( - error_list_str, prefix=" " - ) + message = f"Collected {len(self.errors)} {error}" + if self.description is not None: + message += f" when checking {self.description}" + + return f"{message}:\n" + textwrap.indent(error_list_str, prefix=" ") def __bool__(self) -> bool: return bool(self.errors) diff --git a/nomenclature/processor/__init__.py b/nomenclature/processor/__init__.py index 2423344c..4591f186 100644 --- a/nomenclature/processor/__init__.py +++ b/nomenclature/processor/__init__.py @@ -4,3 +4,4 @@ RegionProcessor, ) from nomenclature.processor.required_data import RequiredDataValidator # noqa +from nomenclature.processor.data_validator import DataValidator # noqa diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py index 96b79d20..dfc5158c 100644 --- a/nomenclature/processor/data_validator.py +++ b/nomenclature/processor/data_validator.py @@ -3,7 +3,7 @@ import yaml -from nomenclature import DataStructureDefinition +from nomenclature.definition import DataStructureDefinition from nomenclature.error import ErrorCollector from nomenclature.processor.iamc import IamcDataFilter from nomenclature.processor import Processor @@ -33,11 +33,11 @@ def apply(self): pass def validate_with_definition(self, dsd: DataStructureDefinition) -> None: - errors = ErrorCollector() + errors = ErrorCollector(description=f"in file '{self.file}'") for data in self.criteria_items: try: data.validate_with_definition(dsd) except ValueError as value_error: errors.append(value_error) if errors: - raise ValueError(f"In file {get_relative_path(self.file)}:\n{errors}") + raise ValueError(errors) diff --git a/nomenclature/testing.py b/nomenclature/testing.py index 28069981..8a4c689f 100644 --- a/nomenclature/testing.py +++ b/nomenclature/testing.py @@ -5,7 +5,12 @@ import yaml from nomenclature.definition import DataStructureDefinition -from nomenclature.processor import RegionProcessor, RequiredDataValidator +from nomenclature.processor import ( + DataValidator, + RegionProcessor, + RequiredDataValidator, + Processor, +) from nomenclature.error import ErrorCollector logger = logging.getLogger(__name__) @@ -63,35 +68,38 @@ def _check_mappings( raise FileNotFoundError(f"Mappings directory not found: {path / mappings}") -def _collect_RequiredData_errors( - required_data_dir: Path, dsd: DataStructureDefinition +def _collect_processor_errors( + path: Path, + processor: type[RequiredDataValidator] | type[DataValidator], + dsd: DataStructureDefinition, ) -> None: - errors = ErrorCollector() - for file in required_data_dir.iterdir(): + errors = ErrorCollector(description=f"files for '{processor.__name__}' ({path})") + for file in path.iterdir(): try: - RequiredDataValidator.from_file(file).validate_with_definition(dsd) + processor.from_file(file).validate_with_definition(dsd) except ValueError as error: errors.append(error) if errors: - raise ValueError(f"Found error(s) in required data files:\n{errors}") + raise ValueError(errors) -def _check_RequiredData( +def _check_processor_directory( path: Path, - definitions: str = "definitions", + processor: Processor, + processor_arg: str, + folder: Optional[str] = None, + definitions: Optional[str] = "definitions", dimensions: Optional[List[str]] = None, - required_data: Optional[str] = None, ) -> None: dsd = DataStructureDefinition(path / definitions, dimensions) - if required_data is None: - if (path / "requiredData").is_dir(): - _collect_RequiredData_errors(path / "required_data", dsd) - - elif (path / required_data).is_dir(): - _collect_RequiredData_errors(path / required_data, dsd) + if folder is None: + if (path / processor_arg).is_dir(): + _collect_processor_errors(path / processor_arg, processor, dsd) + elif (path / folder).is_dir(): + _collect_processor_errors(path / folder, processor, dsd) else: raise FileNotFoundError( - f"Directory for required data not found at: {path / required_data}" + f"Directory for '{processor_arg}' not found at: {path / folder}" ) @@ -100,6 +108,7 @@ def assert_valid_structure( definitions: str = "definitions", mappings: Optional[str] = None, required_data: Optional[str] = None, + validate_data: Optional[str] = None, dimensions: Optional[List[str]] = None, ) -> None: """Assert that `path` can be initialized as a :class:`DataStructureDefinition` @@ -113,8 +122,11 @@ def assert_valid_structure( mappings : str, optional Name of the mappings folder, defaults to "mappings" (if this folder exists) required_data : str, optional - Name of the required data folder, defaults to "required_data" (if this folder - exists) + Name of the folder for required data, defaults to "required_data" + (if this folder exists) + validate_data : str, optional + Name of the folder for data validation criteria, defaults to "validate_date" + (if this folder exists) dimensions : List[str], optional Dimensions to be checked, defaults to all sub-folders of `definitions` @@ -122,7 +134,7 @@ def assert_valid_structure( ----- Folder structure of `path`: - A `definitions` folder is required and must be a valid - :class:`DataStructureDefinition` + :class:`DataStructureDefinition` - The `definitions` folder must contain sub-folder(s) to validate - If a `mappings` folder exists, it must be a valid :class:`RegionProcessor` @@ -139,7 +151,17 @@ def assert_valid_structure( f"`definitions` directory is empty: {path / definitions}" ) _check_mappings(path, definitions, dimensions, mappings) - _check_RequiredData(path, definitions, dimensions, required_data) + _check_processor_directory( + path, + RequiredDataValidator, + "required_data", + required_data, + definitions, + dimensions, + ) + _check_processor_directory( + path, DataValidator, "validate_data", validate_data, definitions, dimensions + ) # Todo: add function which runs `DataStructureDefinition(path).validate(scenario)` diff --git a/tests/data/validation/definition/region/region.yaml b/tests/data/validation/definitions/region/region.yaml similarity index 100% rename from tests/data/validation/definition/region/region.yaml rename to tests/data/validation/definitions/region/region.yaml diff --git a/tests/data/validation/definition/variable/variable.yaml b/tests/data/validation/definitions/variable/variable.yaml similarity index 100% rename from tests/data/validation/definition/variable/variable.yaml rename to tests/data/validation/definitions/variable/variable.yaml diff --git a/tests/data/validation/validate_data/fail_unknown_region.yaml b/tests/data/validation/validate_data/fail_unknown_region.yaml deleted file mode 100644 index 8987e6ae..00000000 --- a/tests/data/validation/validate_data/fail_unknown_region.yaml +++ /dev/null @@ -1,3 +0,0 @@ - - variable: Final Energy - region: Asia - diff --git a/tests/data/validation/validate_data/validation_unknown_region.yaml b/tests/data/validation/validate_data/validate_unknown_region.yaml similarity index 100% rename from tests/data/validation/validate_data/validation_unknown_region.yaml rename to tests/data/validation/validate_data/validate_unknown_region.yaml diff --git a/tests/data/validation/validate_data/validation_unknown_variable.yaml b/tests/data/validation/validate_data/validate_unknown_variable.yaml similarity index 100% rename from tests/data/validation/validate_data/validation_unknown_variable.yaml rename to tests/data/validation/validate_data/validate_unknown_variable.yaml diff --git a/tests/test_cli.py b/tests/test_cli.py index af08f259..6687140a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -12,7 +12,6 @@ from nomenclature import cli from nomenclature.testing import assert_valid_structure, assert_valid_yaml from nomenclature.codelist import VariableCodeList -from nomenclature.cli import cli_run_workflow from conftest import TEST_DATA_DIR @@ -256,6 +255,22 @@ def test_cli_missing_mappings_fails(): assert "Mappings directory not found" in str(cli_result.exception) +def test_cli_validate_data_fails(): + """Assert that validating invalid yaml fails""" + cli_result = runner.invoke( + cli, + [ + "validate-project", + str(TEST_DATA_DIR / "validation"), + ], + ) + + assert cli_result.exit_code == 1 + assert "Collected 2 errors" in str(cli_result.exception) + assert "Asia" in str(cli_result.exception) + assert "Final Energy|Industry" in str(cli_result.exception) + + def test_cli_empty_definitions_dir(): """Assert that an error is raised when the `definitions` directory is empty""" @@ -365,7 +380,6 @@ def test_cli_add_missing_variables(simple_definition, tmp_path): def test_cli_run_workflow(tmp_path, simple_df): - simple_df.to_excel(tmp_path / "input.xlsx") runner.invoke( diff --git a/tests/test_data_validation.py b/tests/test_validate_data.py similarity index 92% rename from tests/test_data_validation.py rename to tests/test_validate_data.py index 8a6f8614..4d339750 100644 --- a/tests/test_data_validation.py +++ b/tests/test_validate_data.py @@ -25,7 +25,7 @@ def test_DataValidator_from_file(): obs = DataValidator.from_file(DATA_VALIDATION_TEST_DIR / "simple_validation.yaml") assert obs == exp - dsd = DataStructureDefinition(TEST_DATA_DIR / "validation" / "definition") + dsd = DataStructureDefinition(TEST_DATA_DIR / "validation" / "definitions") assert obs.validate_with_definition(dsd) is None @@ -43,17 +43,17 @@ def test_DataValidator_validate_with_definition_raises(dimension, match): # TODO Undefined unit data_validator = DataValidator.from_file( - DATA_VALIDATION_TEST_DIR / f"validation_unknown_{dimension}.yaml" + DATA_VALIDATION_TEST_DIR / f"validate_unknown_{dimension}.yaml" ) # validating against a DataStructure with all dimensions raises - dsd = DataStructureDefinition(TEST_DATA_DIR / "validation" / "definition") + dsd = DataStructureDefinition(TEST_DATA_DIR / "validation" / "definitions") with pytest.raises(ValueError, match=match): data_validator.validate_with_definition(dsd) # validating against a DataStructure without the offending dimension passes dsd = DataStructureDefinition( - TEST_DATA_DIR / "validation" / "definition", + TEST_DATA_DIR / "validation" / "definitions", dimensions=[dim for dim in ["region", "variable"] if dim != dimension], ) assert data_validator.validate_with_definition(dsd) is None