diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py index 1f4fb850..4733c50d 100644 --- a/nomenclature/__init__.py +++ b/nomenclature/__init__.py @@ -1,5 +1,6 @@ import logging from importlib.metadata import version +from pathlib import Path import yaml @@ -46,16 +47,22 @@ def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=None): ).to_yaml(target) -def parse_model_registration(model_registration_file): +def parse_model_registration( + model_registration_file: str | Path, output_directory: str | Path = Path(".") +) -> None: """Parses a model registration file and writes the definitions & mapping yaml files Parameters ---------- - source : str, path, file-like object + model_registration_file : str, path, file-like object Path to xlsx model registration file. - file_name : str - Model-identifier part of the yaml file names. + output_directory : str, path, file-like object + Directory where the model mapping and region file will be saved; + defaults to current working directory """ + if not isinstance(output_directory, Path): + output_directory = Path(output_directory) + region_aggregregation_mapping = RegionAggregationMapping.from_file( model_registration_file ) @@ -63,7 +70,9 @@ def parse_model_registration(model_registration_file): x if (x.isalnum() or x in "._- ") else "_" for x in region_aggregregation_mapping.model[0] ) - region_aggregregation_mapping.to_yaml(f"{file_model_name}_mapping.yaml") + region_aggregregation_mapping.to_yaml( + output_directory / f"{file_model_name}_mapping.yaml" + ) if native_regions := [ { region_aggregregation_mapping.model[ @@ -71,5 +80,9 @@ def parse_model_registration(model_registration_file): ]: region_aggregregation_mapping.upload_native_regions } ]: - with open(f"{file_model_name}_regions.yaml", "w") as f: - yaml.dump(native_regions, f) + with open( + (output_directory / f"{file_model_name}_regions.yaml"), + "w", + encoding="utf-8", + ) as file: + yaml.dump(native_regions, file) diff --git a/nomenclature/cli.py b/nomenclature/cli.py index 9938dac1..23a9ef1a 100644 --- a/nomenclature/cli.py +++ b/nomenclature/cli.py @@ -1,4 +1,3 @@ -import ast from pathlib import Path from typing import List, Optional @@ -12,16 +11,6 @@ cli = click.Group() -class PythonLiteralOption(click.Option): - def type_cast_value(self, ctx, value): - if value is None: - return None - try: - return ast.literal_eval(value) - except Exception: - raise click.BadParameter(value) - - @cli.command("validate-yaml") @click.argument("path", type=click.Path(exists=True, path_type=Path)) def cli_valid_yaml(path: Path): @@ -49,7 +38,8 @@ def cli_valid_yaml(path: Path): @click.option( "--dimensions", help="Optional list of dimensions", - cls=PythonLiteralOption, + type=str, + multiple=True, default=None, ) def cli_valid_project( diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py index f4b00cd3..37da4965 100644 --- a/nomenclature/codelist.py +++ b/nomenclature/codelist.py @@ -54,6 +54,7 @@ def check_stray_tag(cls, v: Dict[str, Code]) -> Dict[str, Code]: return v @field_validator("mapping") + @classmethod def check_end_whitespace( cls, v: Dict[str, Code], info: ValidationInfo ) -> Dict[str, Code]: @@ -66,9 +67,13 @@ def check_end_whitespace( ) return v - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: Code) -> None: if key in self.mapping: raise ValueError(f"Duplicate item in {self.name} codelist: {key}") + if not isinstance(value, Code): + raise TypeError("Codelist can only contain Code items") + if key != value.name: + raise ValueError("Key has to be equal to code name") self.mapping[key] = value def __getitem__(self, k): diff --git a/nomenclature/config.py b/nomenclature/config.py index 8295a835..92aea787 100644 --- a/nomenclature/config.py +++ b/nomenclature/config.py @@ -3,7 +3,7 @@ import yaml from git import Repo -from pydantic import BaseModel, ValidationInfo, field_validator, model_validator +from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator class CodeListConfig(BaseModel): @@ -27,9 +27,8 @@ class Repository(BaseModel): url: str hash: str | None = None release: str | None = None - local_path: Path | None = ( - None # defined via the `repository` name in the configuration - ) + local_path: Path | None = Field(default=None, validate_default=True) + # defined via the `repository` name in the configuration @model_validator(mode="after") @classmethod @@ -110,14 +109,6 @@ def check_definitions_repository( definitions_repos = v.definitions.repos if v.definitions else {} mapping_repos = {"mappings": v.mappings.repository} if v.mappings else {} repos = {**definitions_repos, **mapping_repos} - if repos and not v.repositories: - raise ValueError( - ( - "If repositories are used for definitions or mappings, they need " - "to be defined under `repositories`" - ) - ) - for use, repository in repos.items(): if repository not in v.repositories: raise ValueError((f"Unknown repository '{repository}' in {use}.")) diff --git a/nomenclature/processor/processor.py b/nomenclature/processor/processor.py index 92267327..ac3cfb1f 100644 --- a/nomenclature/processor/processor.py +++ b/nomenclature/processor/processor.py @@ -7,4 +7,4 @@ class Processor(BaseModel, abc.ABC): @abc.abstractmethod def apply(self, df: IamDataFrame) -> IamDataFrame: - return + raise NotImplementedError diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py index d6deb170..7df675c1 100644 --- a/nomenclature/processor/region.py +++ b/nomenclature/processor/region.py @@ -61,9 +61,6 @@ def target_native_region(self) -> str: """ return self.rename if self.rename is not None else self.name - def __eq__(self, other: "NativeRegion") -> bool: - return super().__eq__(other) - class CommonRegion(BaseModel): """Common region used for model intercomparison. @@ -92,9 +89,6 @@ def rename_dict(self): "rename_dict is only available for single constituent regions" ) - def __eq__(self, other: "CommonRegion") -> bool: - return super().__eq__(other) - class RegionAggregationMapping(BaseModel): """Hold information for region processing on a per-model basis. @@ -413,7 +407,9 @@ def __eq__(self, other: "RegionAggregationMapping") -> bool: return self.model_dump(exclude={"file"}) == other.model_dump(exclude={"file"}) def to_yaml(self, file) -> None: - dict_representation = {"model": self.model} + dict_representation = { + "model": self.model[0] if len(self.model) == 1 else self.model + } if self.native_regions: dict_representation["native_regions"] = [ {native_region.name: native_region.rename} diff --git a/nomenclature/testing.py b/nomenclature/testing.py index 66fc2b83..28069981 100644 --- a/nomenclature/testing.py +++ b/nomenclature/testing.py @@ -132,10 +132,7 @@ def assert_valid_structure( f"Definitions directory not found: {path / definitions}" ) - if dimensions == []: # if "dimensions" were specified as "[]" - raise ValueError("No dimensions to validate.") - - if dimensions is None: # if "dimensions" were not specified + if dimensions == (): # if "dimensions" were not specified dimensions = [x.stem for x in (path / definitions).iterdir() if x.is_dir()] if not dimensions: raise FileNotFoundError( diff --git a/tests/data/model_registration/model-registration-template.xlsx b/tests/data/model_registration/model-registration-template.xlsx new file mode 100644 index 00000000..d3a95d33 Binary files /dev/null and b/tests/data/model_registration/model-registration-template.xlsx differ diff --git a/tests/data/nomenclature_configs/hash_and_release.yaml b/tests/data/nomenclature_configs/hash_and_release.yaml new file mode 100644 index 00000000..8e8a0fe5 --- /dev/null +++ b/tests/data/nomenclature_configs/hash_and_release.yaml @@ -0,0 +1,12 @@ +repositories: + common-definitions: + url: https://github.com/IAMconsortium/common-definitions.git/ + hash: asdf + release: "1.0" +definitions: + region: + repository: common-definitions + country: true + variable: + repository: common-definitions + repository_dimension_path: definitions/variable diff --git a/tests/data/nomenclature_configs/unknown_repo.yaml b/tests/data/nomenclature_configs/unknown_repo.yaml new file mode 100644 index 00000000..08187c7a --- /dev/null +++ b/tests/data/nomenclature_configs/unknown_repo.yaml @@ -0,0 +1,3 @@ +definitions: + region: + repository: common-definitions diff --git a/tests/test_cli.py b/tests/test_cli.py index 9d0f4753..0f049783 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,13 +1,16 @@ import subprocess import sys -from click.testing import CliRunner -from nomenclature import cli -from nomenclature.testing import assert_valid_yaml, assert_valid_structure -import pytest +import pandas as pd import pydantic - +import pytest +from click.testing import CliRunner from conftest import TEST_DATA_DIR +from pandas.testing import assert_frame_equal +from pyam import IAMC_IDX, IamDataFrame, assert_iamframe_equal + +from nomenclature import cli +from nomenclature.testing import assert_valid_structure, assert_valid_yaml runner = CliRunner() @@ -156,7 +159,11 @@ def test_cli_custom_dimensions_runs(): "validate-project", str(TEST_DATA_DIR / "non-default_dimensions"), "--dimensions", - "['variable', 'region', 'scenario']", + "variable", + "--dimensions", + "region", + "--dimensions", + "scenario", ], ) assert result_valid.exit_code == 0 @@ -172,7 +179,7 @@ def test_cli_custom_dimensions_fails(): "validate-project", str(TEST_DATA_DIR / "non-default_dimensions"), "--dimensions", - "['variable', 'region', 'foo']", + "foo", ], ) assert result_invalid.exit_code == 1 @@ -190,7 +197,9 @@ def test_cli_empty_dimensions_run(): "validate-project", str(TEST_DATA_DIR / "non-default_dimensions_one_empty"), "--dimensions", - "['variable', 'region']", + "variable", + "--dimensions", + "region", ], ) assert result_valid.exit_code == 0 @@ -255,19 +264,51 @@ def test_cli_empty_definitions_dir(): assert "`definitions` directory is empty" in str(cli_result.exception) -def test_cli_empty_dimensions(): - """Assert that an error is raised when an empty list is given as dimensions""" - - cli_result = runner.invoke( +def test_check_region_aggregation(tmp_path): + IamDataFrame( + pd.DataFrame( + [ + ["m_a", "s_a", "region_A", "Primary Energy", "EJ/yr", 1, 2], + ["m_a", "s_a", "region_B", "Primary Energy", "EJ/yr", 3, 4], + ["m_a", "s_a", "World", "Primary Energy", "EJ/yr", 5, 6], + ], + columns=IAMC_IDX + [2005, 2010], + ) + ).to_excel(tmp_path / "data.xlsx") + runner.invoke( cli, [ - "validate-project", - str(TEST_DATA_DIR / "non-default_dimensions"), - "--dimensions", - "[]", + "check-region-aggregation", + str(tmp_path / "data.xlsx"), + "--workflow-directory", + str(TEST_DATA_DIR / "region_processing"), + "--definitions", + "dsd", + "--mappings", + "partial_aggregation", + "--processed-data", + str(tmp_path / "results.xlsx"), + "--differences", + str(tmp_path / "differences.xlsx"), ], ) - assert cli_result.exit_code == 1 - assert isinstance(cli_result.exception, ValueError) - assert "No dimensions to validate." in str(cli_result.exception) + # Check differences + exp_difference = pd.DataFrame( + [ + ["m_a", "s_a", "World", "Primary Energy", "EJ/yr", 2005, 5, 4, 20.0], + ], + columns=IAMC_IDX + ["year", "original", "aggregated", "difference (%)"], + ) + assert_frame_equal( + pd.read_excel(tmp_path / "differences.xlsx"), exp_difference, check_dtype=False + ) + + # Check aggregation result + exp_result = IamDataFrame( + pd.DataFrame( + [["m_a", "s_a", "World", "Primary Energy", "EJ/yr", 5, 6]], + columns=IAMC_IDX + [2005, 2010], + ) + ) + assert_iamframe_equal(IamDataFrame(tmp_path / "results.xlsx"), exp_result) diff --git a/tests/test_code.py b/tests/test_code.py index 2f2ee44c..c228c5e1 100644 --- a/tests/test_code.py +++ b/tests/test_code.py @@ -1,10 +1,11 @@ import pytest +from pytest import raises from nomenclature.code import Code, VariableCode, RegionCode, MetaCode def test_variable_without_unit_raises(): - with pytest.raises(ValueError, match="unit\n.*required"): + with raises(ValueError, match="unit\n.*required"): VariableCode(name="No unit") @@ -26,7 +27,7 @@ def test_variable_alias_setting(): @pytest.mark.parametrize("illegal_key", ["contains-hyphen", "also not allowed", "True"]) def test_illegal_additional_attribute(illegal_key): match = f"{illegal_key}.*'code1'.*not allowed" - with pytest.raises(ValueError, match=match): + with raises(ValueError, match=match): Code(name="code1", extra_attributes={illegal_key: True}) @@ -161,7 +162,7 @@ def test_RegionCode_iso3_code_list_fail(): "IBL, ITL, LIC, MLA, BEG, FRT, ANB, GDR, LXB, MNO, NTD, NRW, PRE, EPA, " # noqa "SWD, CEW, GTR, SOR" # noqa ) - with pytest.raises(ValueError, match=error_pattern): + with raises(ValueError, match=error_pattern): RegionCode(name="Western Europe", hierarchy="R5OECD", iso3_codes=iso3_codes) @@ -171,7 +172,7 @@ def test_RegionCode_iso3_code_str_fail(): "iso3_codes\n" " Value error, Region 'Austria' has invalid ISO3 country code\(s\): AUTT" ) - with pytest.raises(ValueError, match=error_pattern): + with raises(ValueError, match=error_pattern): RegionCode(name="Austria", hierarchy="country", iso3_codes="AUTT") @@ -182,3 +183,13 @@ def test_MetaCode_allowed_values_attribute(): ) assert meta.allowed_values == [True] + + +def test_code_with_multi_key_dict_raises(): + with raises(ValueError, match="Code is not a single name-attributes mapping"): + Code.from_dict({"name": "", "illegal second key": ""}) + + +def test_code_with_definition_and_description_raises(): + with raises(ValueError, match="Found both 'definition' and 'description'"): + Code.from_dict({"Code": {"definition": "", "description": ""}}) diff --git a/tests/test_codelist.py b/tests/test_codelist.py index bdc6767b..4fd0a5c0 100644 --- a/tests/test_codelist.py +++ b/tests/test_codelist.py @@ -1,9 +1,9 @@ -import pytest +from pytest import raises import pandas as pd import pandas.testing as pdt import logging -from nomenclature.code import Code, RegionCode, MetaCode +from nomenclature.code import Code, RegionCode, MetaCode, VariableCode from nomenclature.codelist import ( CodeList, VariableCodeList, @@ -25,6 +25,28 @@ def test_simple_codelist(): assert type(codelist["Some Variable"].bool) == bool # this is a boolean +def test_codelist_adding_duplicate_raises(): + codelist = VariableCodeList.from_directory( + "variable", TEST_DATA_DIR / "simple_codelist" + ) + with raises(ValueError, match="Duplicate item in variable codelist: Some Variable"): + codelist["Some Variable"] = "" + + +def test_codelist_adding_non_code_raises(): + codelist = CodeList(name="test") + + with raises(TypeError, match="Codelist can only contain Code items"): + codelist["Some Variable"] = "" + + +def test_codelist_name_key_mismatch(): + codelist = CodeList(name="test") + + with raises(ValueError, match="Key has to be equal to code name"): + codelist["Some Variable"] = Code(name="Some other variable") + + def test_codelist_to_yaml(): """Cast a codelist to yaml format""" code = VariableCodeList.from_directory( @@ -44,7 +66,7 @@ def test_codelist_to_yaml(): def test_duplicate_code_raises(): """Check that code conflicts across different files raises""" match = "Duplicate item in variable codelist: Some Variable" - with pytest.raises(ValueError, match=match): + with raises(ValueError, match=match): VariableCodeList.from_directory( "variable", TEST_DATA_DIR / "duplicate_code_raises" ) @@ -53,7 +75,7 @@ def test_duplicate_code_raises(): def test_duplicate_tag_raises(): """Check that code conflicts across different files raises""" match = "Duplicate item in tag codelist: Tag" - with pytest.raises(ValueError, match=match): + with raises(ValueError, match=match): VariableCodeList.from_directory( "variable", TEST_DATA_DIR / "duplicate_tag_raises" ) @@ -138,7 +160,7 @@ def test_stray_tag_fails(): """Check that typos in a tag raises expected error""" match = r"Unexpected {} in codelist: Primary Energy\|{Feul}" - with pytest.raises(ValueError, match=match): + with raises(ValueError, match=match): VariableCodeList.from_directory( "variable", TEST_DATA_DIR / "stray_tag" / "definitions" / "variable" ) @@ -148,7 +170,7 @@ def test_end_whitespace_fails(): """Check that typos in a tag raises expected error""" match = "Unexpected whitespace at the end of a scenario code: 'scenario2 '" - with pytest.raises(ValueError, match=match): + with raises(ValueError, match=match): CodeList.from_directory( "scenario", TEST_DATA_DIR / "end_whitespace" / "definitions" / "scenario" ) diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 00000000..3e72cb72 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,25 @@ +from pathlib import Path +from pytest import raises + +from nomenclature.config import Repository, NomenclatureConfig + +from conftest import TEST_DATA_DIR + + +def test_hash_and_release_raises(): + with raises(ValueError, match="`hash` or `release` can be provided, not both"): + NomenclatureConfig.from_file( + TEST_DATA_DIR / "nomenclature_configs" / "hash_and_release.yaml" + ) + + +def test_setting_local_path_raises(): + with raises(ValueError, match="`local_path` must not be set"): + Repository(local_path=Path(".")) + + +def test_unknown_repo_raises(): + with raises(ValueError, match="Unknown repository 'common-definitions'"): + NomenclatureConfig.from_file( + TEST_DATA_DIR / "nomenclature_configs" / "unknown_repo.yaml" + ) diff --git a/tests/test_model_registration_parser.py b/tests/test_model_registration_parser.py new file mode 100644 index 00000000..7298dbec --- /dev/null +++ b/tests/test_model_registration_parser.py @@ -0,0 +1,28 @@ +import yaml + +from nomenclature import parse_model_registration + +from conftest import TEST_DATA_DIR + + +def test_parse_model_registration(tmp_path): + parse_model_registration( + TEST_DATA_DIR / "region_aggregation" / "excel_model_registration.xlsx", tmp_path + ) + + # Test model mapping + with open(tmp_path / "Model 1.1_mapping.yaml", "r") as file: + obs_model_mapping = yaml.safe_load(file) + with open( + TEST_DATA_DIR / "region_aggregation" / "excel_mapping_reference.yaml", "r" + ) as file: + exp_model_mapping = yaml.safe_load(file) + assert obs_model_mapping == exp_model_mapping + + # Test model regions + with open(tmp_path / "Model 1.1_regions.yaml", "r") as file: + obs_model_regions = yaml.safe_load(file) + exp_model_regions = [ + {"Model 1.1": ["Model 1.1|Region 1", "Region 2", "Model 1.1|Region 3"]} + ] + assert obs_model_regions == exp_model_regions