Improve test coverage (#310)

* Add output_directory option to parse_model_registration * Remove multiple option to be in line with cli standards * Update cli tests * Add classmethod decorator * Add two checks for setting CodeList items * Validate default value for local_path * Remove unnecessary check * Raise NotImplementedError * Remove unused equality operators * Refine dict representation * Adjust after cli changes * Add explicit config tests * Change pytest raise import * Add two explicit Code tests * Add CodeList tests * Config test data * Add explicit model registration parser test * Apply suggestions from code review Co-authored-by: Daniel Huppmann <dh@dergelbesalon.at> --------- Co-authored-by: Daniel Huppmann <dh@dergelbesalon.at>
IAMconsortium · Jan 22, 2024 · a9c47dc · a9c47dc
1 parent ba0ded0
commit a9c47dc
Show file tree

Hide file tree

Showing 15 changed files with 207 additions and 73 deletions.
diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py
@@ -1,5 +1,6 @@
 import logging
 from importlib.metadata import version
+from pathlib import Path
 
 import yaml
 
@@ -46,30 +47,42 @@ def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=None):
     ).to_yaml(target)
 
 
-def parse_model_registration(model_registration_file):
+def parse_model_registration(
+    model_registration_file: str | Path, output_directory: str | Path = Path(".")
+) -> None:
     """Parses a model registration file and writes the definitions & mapping yaml files
 
     Parameters
     ----------
-    source : str, path, file-like object
+    model_registration_file : str, path, file-like object
         Path to xlsx model registration file.
-    file_name : str
-        Model-identifier part of the yaml file names.
+    output_directory : str, path, file-like object
+        Directory where the model mapping and region file will be saved;
+        defaults to current working directory
     """
+    if not isinstance(output_directory, Path):
+        output_directory = Path(output_directory)
+
     region_aggregregation_mapping = RegionAggregationMapping.from_file(
         model_registration_file
     )
     file_model_name = "".join(
         x if (x.isalnum() or x in "._- ") else "_"
         for x in region_aggregregation_mapping.model[0]
     )
-    region_aggregregation_mapping.to_yaml(f"{file_model_name}_mapping.yaml")
+    region_aggregregation_mapping.to_yaml(
+        output_directory / f"{file_model_name}_mapping.yaml"
+    )
     if native_regions := [
         {
             region_aggregregation_mapping.model[
                 0
             ]: region_aggregregation_mapping.upload_native_regions
         }
     ]:
-        with open(f"{file_model_name}_regions.yaml", "w") as f:
-            yaml.dump(native_regions, f)
+        with open(
+            (output_directory / f"{file_model_name}_regions.yaml"),
+            "w",
+            encoding="utf-8",
+        ) as file:
+            yaml.dump(native_regions, file)
diff --git a/nomenclature/cli.py b/nomenclature/cli.py
@@ -1,4 +1,3 @@
-import ast
 from pathlib import Path
 from typing import List, Optional
 
@@ -12,16 +11,6 @@
 cli = click.Group()
 
 
-class PythonLiteralOption(click.Option):
-    def type_cast_value(self, ctx, value):
-        if value is None:
-            return None
-        try:
-            return ast.literal_eval(value)
-        except Exception:
-            raise click.BadParameter(value)
-
-
 @cli.command("validate-yaml")
 @click.argument("path", type=click.Path(exists=True, path_type=Path))
 def cli_valid_yaml(path: Path):
@@ -49,7 +38,8 @@ def cli_valid_yaml(path: Path):
 @click.option(
     "--dimensions",
     help="Optional list of dimensions",
-    cls=PythonLiteralOption,
+    type=str,
+    multiple=True,
     default=None,
 )
 def cli_valid_project(

diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py
@@ -54,6 +54,7 @@ def check_stray_tag(cls, v: Dict[str, Code]) -> Dict[str, Code]:
         return v
 
     @field_validator("mapping")
+    @classmethod
     def check_end_whitespace(
         cls, v: Dict[str, Code], info: ValidationInfo
     ) -> Dict[str, Code]:
@@ -66,9 +67,13 @@ def check_end_whitespace(
                 )
         return v
 
-    def __setitem__(self, key, value):
+    def __setitem__(self, key: str, value: Code) -> None:
         if key in self.mapping:
             raise ValueError(f"Duplicate item in {self.name} codelist: {key}")
+        if not isinstance(value, Code):
+            raise TypeError("Codelist can only contain Code items")
+        if key != value.name:
+            raise ValueError("Key has to be equal to code name")
         self.mapping[key] = value
 
     def __getitem__(self, k):

diff --git a/nomenclature/config.py b/nomenclature/config.py
@@ -3,7 +3,7 @@
 
 import yaml
 from git import Repo
-from pydantic import BaseModel, ValidationInfo, field_validator, model_validator
+from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator
 
 
 class CodeListConfig(BaseModel):
@@ -27,9 +27,8 @@ class Repository(BaseModel):
     url: str
     hash: str | None = None
     release: str | None = None
-    local_path: Path | None = (
-        None  # defined via the `repository` name in the configuration
-    )
+    local_path: Path | None = Field(default=None, validate_default=True)
+    # defined via the `repository` name in the configuration
 
     @model_validator(mode="after")
     @classmethod
@@ -110,14 +109,6 @@ def check_definitions_repository(
         definitions_repos = v.definitions.repos if v.definitions else {}
         mapping_repos = {"mappings": v.mappings.repository} if v.mappings else {}
         repos = {**definitions_repos, **mapping_repos}
-        if repos and not v.repositories:
-            raise ValueError(
-                (
-                    "If repositories are used for definitions or mappings, they need "
-                    "to be defined under `repositories`"
-                )
-            )
-
         for use, repository in repos.items():
             if repository not in v.repositories:
                 raise ValueError((f"Unknown repository '{repository}' in {use}."))

diff --git a/nomenclature/processor/processor.py b/nomenclature/processor/processor.py
@@ -7,4 +7,4 @@
 class Processor(BaseModel, abc.ABC):
     @abc.abstractmethod
     def apply(self, df: IamDataFrame) -> IamDataFrame:
-        return
+        raise NotImplementedError
diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py
@@ -61,9 +61,6 @@ def target_native_region(self) -> str:
         """
         return self.rename if self.rename is not None else self.name
 
-    def __eq__(self, other: "NativeRegion") -> bool:
-        return super().__eq__(other)
-
 
 class CommonRegion(BaseModel):
     """Common region used for model intercomparison.
@@ -92,9 +89,6 @@ def rename_dict(self):
             "rename_dict is only available for single constituent regions"
         )
 
-    def __eq__(self, other: "CommonRegion") -> bool:
-        return super().__eq__(other)
-
 
 class RegionAggregationMapping(BaseModel):
     """Hold information for region processing on a per-model basis.
@@ -413,7 +407,9 @@ def __eq__(self, other: "RegionAggregationMapping") -> bool:
         return self.model_dump(exclude={"file"}) == other.model_dump(exclude={"file"})
 
     def to_yaml(self, file) -> None:
-        dict_representation = {"model": self.model}
+        dict_representation = {
+            "model": self.model[0] if len(self.model) == 1 else self.model
+        }
         if self.native_regions:
             dict_representation["native_regions"] = [
                 {native_region.name: native_region.rename}

diff --git a/nomenclature/testing.py b/nomenclature/testing.py
@@ -132,10 +132,7 @@ def assert_valid_structure(
             f"Definitions directory not found: {path / definitions}"
         )
 
-    if dimensions == []:  # if "dimensions" were specified as "[]"
-        raise ValueError("No dimensions to validate.")
-
-    if dimensions is None:  # if "dimensions" were not specified
+    if dimensions == ():  # if "dimensions" were not specified
         dimensions = [x.stem for x in (path / definitions).iterdir() if x.is_dir()]
         if not dimensions:
             raise FileNotFoundError(

diff --git a/tests/data/model_registration/model-registration-template.xlsx b/tests/data/model_registration/model-registration-template.xlsx
diff --git a/tests/data/nomenclature_configs/hash_and_release.yaml b/tests/data/nomenclature_configs/hash_and_release.yaml
@@ -0,0 +1,12 @@
+repositories:
+  common-definitions:
+    url: https://github.com/IAMconsortium/common-definitions.git/
+    hash: asdf
+    release: "1.0"
+definitions:
+  region:
+    repository: common-definitions
+    country: true
+  variable:
+    repository: common-definitions
+    repository_dimension_path: definitions/variable
diff --git a/tests/data/nomenclature_configs/unknown_repo.yaml b/tests/data/nomenclature_configs/unknown_repo.yaml
@@ -0,0 +1,3 @@
+definitions:
+  region:
+    repository: common-definitions
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,13 +1,16 @@
 import subprocess
 import sys
 
-from click.testing import CliRunner
-from nomenclature import cli
-from nomenclature.testing import assert_valid_yaml, assert_valid_structure
-import pytest
+import pandas as pd
 import pydantic
-
+import pytest
+from click.testing import CliRunner
 from conftest import TEST_DATA_DIR
+from pandas.testing import assert_frame_equal
+from pyam import IAMC_IDX, IamDataFrame, assert_iamframe_equal
+
+from nomenclature import cli
+from nomenclature.testing import assert_valid_structure, assert_valid_yaml
 
 runner = CliRunner()
 
@@ -156,7 +159,11 @@ def test_cli_custom_dimensions_runs():
             "validate-project",
             str(TEST_DATA_DIR / "non-default_dimensions"),
             "--dimensions",
-            "['variable', 'region', 'scenario']",
+            "variable",
+            "--dimensions",
+            "region",
+            "--dimensions",
+            "scenario",
         ],
     )
     assert result_valid.exit_code == 0
@@ -172,7 +179,7 @@ def test_cli_custom_dimensions_fails():
             "validate-project",
             str(TEST_DATA_DIR / "non-default_dimensions"),
             "--dimensions",
-            "['variable', 'region', 'foo']",
+            "foo",
         ],
     )
     assert result_invalid.exit_code == 1
@@ -190,7 +197,9 @@ def test_cli_empty_dimensions_run():
             "validate-project",
             str(TEST_DATA_DIR / "non-default_dimensions_one_empty"),
             "--dimensions",
-            "['variable', 'region']",
+            "variable",
+            "--dimensions",
+            "region",
         ],
     )
     assert result_valid.exit_code == 0
@@ -255,19 +264,51 @@ def test_cli_empty_definitions_dir():
     assert "`definitions` directory is empty" in str(cli_result.exception)
 
 
-def test_cli_empty_dimensions():
-    """Assert that an error is raised when an empty list is given as dimensions"""
-
-    cli_result = runner.invoke(
+def test_check_region_aggregation(tmp_path):
+    IamDataFrame(
+        pd.DataFrame(
+            [
+                ["m_a", "s_a", "region_A", "Primary Energy", "EJ/yr", 1, 2],
+                ["m_a", "s_a", "region_B", "Primary Energy", "EJ/yr", 3, 4],
+                ["m_a", "s_a", "World", "Primary Energy", "EJ/yr", 5, 6],
+            ],
+            columns=IAMC_IDX + [2005, 2010],
+        )
+    ).to_excel(tmp_path / "data.xlsx")
+    runner.invoke(
         cli,
         [
-            "validate-project",
-            str(TEST_DATA_DIR / "non-default_dimensions"),
-            "--dimensions",
-            "[]",
+            "check-region-aggregation",
+            str(tmp_path / "data.xlsx"),
+            "--workflow-directory",
+            str(TEST_DATA_DIR / "region_processing"),
+            "--definitions",
+            "dsd",
+            "--mappings",
+            "partial_aggregation",
+            "--processed-data",
+            str(tmp_path / "results.xlsx"),
+            "--differences",
+            str(tmp_path / "differences.xlsx"),
         ],
     )
 
-    assert cli_result.exit_code == 1
-    assert isinstance(cli_result.exception, ValueError)
-    assert "No dimensions to validate." in str(cli_result.exception)
+    # Check differences
+    exp_difference = pd.DataFrame(
+        [
+            ["m_a", "s_a", "World", "Primary Energy", "EJ/yr", 2005, 5, 4, 20.0],
+        ],
+        columns=IAMC_IDX + ["year", "original", "aggregated", "difference (%)"],
+    )
+    assert_frame_equal(
+        pd.read_excel(tmp_path / "differences.xlsx"), exp_difference, check_dtype=False
+    )
+
+    # Check aggregation result
+    exp_result = IamDataFrame(
+        pd.DataFrame(
+            [["m_a", "s_a", "World", "Primary Energy", "EJ/yr", 5, 6]],
+            columns=IAMC_IDX + [2005, 2010],
+        )
+    )
+    assert_iamframe_equal(IamDataFrame(tmp_path / "results.xlsx"), exp_result)
diff --git a/tests/test_code.py b/tests/test_code.py
@@ -1,10 +1,11 @@
 import pytest
+from pytest import raises
 
 from nomenclature.code import Code, VariableCode, RegionCode, MetaCode
 
 
 def test_variable_without_unit_raises():
-    with pytest.raises(ValueError, match="unit\n.*required"):
+    with raises(ValueError, match="unit\n.*required"):
         VariableCode(name="No unit")
 
 
@@ -26,7 +27,7 @@ def test_variable_alias_setting():
 @pytest.mark.parametrize("illegal_key", ["contains-hyphen", "also not allowed", "True"])
 def test_illegal_additional_attribute(illegal_key):
     match = f"{illegal_key}.*'code1'.*not allowed"
-    with pytest.raises(ValueError, match=match):
+    with raises(ValueError, match=match):
         Code(name="code1", extra_attributes={illegal_key: True})
 
 
@@ -161,7 +162,7 @@ def test_RegionCode_iso3_code_list_fail():
         "IBL, ITL, LIC, MLA, BEG, FRT, ANB, GDR, LXB, MNO, NTD, NRW, PRE, EPA, "  # noqa
         "SWD, CEW, GTR, SOR"  # noqa
     )
-    with pytest.raises(ValueError, match=error_pattern):
+    with raises(ValueError, match=error_pattern):
         RegionCode(name="Western Europe", hierarchy="R5OECD", iso3_codes=iso3_codes)
 
 
@@ -171,7 +172,7 @@ def test_RegionCode_iso3_code_str_fail():
         "iso3_codes\n"
         "  Value error, Region 'Austria' has invalid ISO3 country code\(s\): AUTT"
     )
-    with pytest.raises(ValueError, match=error_pattern):
+    with raises(ValueError, match=error_pattern):
         RegionCode(name="Austria", hierarchy="country", iso3_codes="AUTT")
 
 
@@ -182,3 +183,13 @@ def test_MetaCode_allowed_values_attribute():
     )
 
     assert meta.allowed_values == [True]
+
+
+def test_code_with_multi_key_dict_raises():
+    with raises(ValueError, match="Code is not a single name-attributes mapping"):
+        Code.from_dict({"name": "", "illegal second key": ""})
+
+
+def test_code_with_definition_and_description_raises():
+    with raises(ValueError, match="Found both 'definition' and 'description'"):
+        Code.from_dict({"Code": {"definition": "", "description": ""}})