General cleanup (#244)

* Remove mutable defaults * Remove unnecessary else * Use getattr instead of __getattribute__ * Rename for improved clarity * Use dict comprehension * Update test data for indent * Use error instead of e * Use getattr instaed of __getattr__
IAMconsortium · May 17, 2023 · b76b28f · b76b28f
1 parent d9de678
commit b76b28f
Show file tree

Hide file tree

Showing 8 changed files with 65 additions and 63 deletions.
diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py
@@ -30,7 +30,7 @@
     __version__ = version("nomenclature-iamc")
 
 
-def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=[]):
+def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=None):
     """Parses an xlsx file with a codelist and writes a yaml file
 
     Parameters
@@ -46,6 +46,8 @@ def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=[]):
     attrs : list, optional
         Columns from `sheet_name` to use as attributes.
     """
+    if attrs is None:
+        attrs = []
     SPECIAL_CODELIST.get(col.lower(), CodeList).read_excel(
         name="", source=source, sheet_name=sheet_name, col=col, attrs=attrs
     ).to_yaml(target)
diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py
@@ -1,10 +1,10 @@
+import logging
 from pathlib import Path
 from typing import ClassVar, Dict, List
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 import yaml
-import logging
 from jsonschema import validate
 from pyam.utils import write_sheet
 from pydantic import BaseModel, validator
@@ -17,18 +17,17 @@
     VariableRenameTargetError,
 )
 
-
 here = Path(__file__).parent.absolute()
 
 
-def read_validation_schema(i):
-    with open(here / "validation_schemas" / f"{i}_schema.yaml", "r") as f:
+def read_validation_schema(schema):
+    with open(here / "validation_schemas" / f"{schema}_schema.yaml", "r") as f:
         schema = yaml.safe_load(f)
     return schema
 
 
 SCHEMA_TYPES = ("variable", "tag", "region", "generic")
-SCHEMA_MAPPING = dict([(i, read_validation_schema(i)) for i in SCHEMA_TYPES])
+SCHEMA_MAPPING = {schema: read_validation_schema(schema) for schema in SCHEMA_TYPES}
 
 
 class CodeList(BaseModel):
@@ -223,7 +222,7 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
         return cls(name=name, mapping=mapping)
 
     @classmethod
-    def read_excel(cls, name, source, sheet_name, col, attrs=[]):
+    def read_excel(cls, name, source, sheet_name, col, attrs=None):
         """Parses an xlsx file with a codelist
 
         Parameters
@@ -239,6 +238,8 @@ def read_excel(cls, name, source, sheet_name, col, attrs=[]):
         attrs : list, optional
             Columns from `sheet_name` to use as attributes.
         """
+        if attrs is None:
+            attrs = []
         codelist = pd.read_excel(source, sheet_name=sheet_name, usecols=[col] + attrs)
 
         # replace nan with None
@@ -274,8 +275,8 @@ def to_yaml(self, path=None):
         """
 
         class Dumper(yaml.Dumper):
-            def increase_indent(self, flow=False, *args, **kwargs):
-                return super().increase_indent(flow=flow, indentless=False)
+            def increase_indent(self, flow: bool = False, indentless: bool = False):
+                return super().increase_indent(flow=flow, indentless=indentless)
 
         # translate to list of nested dicts, replace None by empty field, write to file
         stream = (
@@ -288,11 +289,10 @@ def increase_indent(self, flow=False, *args, **kwargs):
             .replace(": nan\n", ":\n")
         )
 
-        if path is not None:
-            with open(path, "w") as file:
-                file.write(stream)
-        else:
+        if path is None:
             return stream
+        with open(path, "w") as file:
+            file.write(stream)
 
     def to_pandas(self, sort_by_code: bool = False) -> pd.DataFrame:
         """Export the CodeList to a :class:`pandas.DataFrame`
@@ -585,7 +585,7 @@ def hierarchy(self) -> List[str]:
         List[str]
 
         """
-        return sorted(list(set(v.hierarchy for v in self.mapping.values())))
+        return sorted(list({v.hierarchy for v in self.mapping.values()}))
 
 
 class MetaCodeList(CodeList):

diff --git a/nomenclature/core.py b/nomenclature/core.py
@@ -10,7 +10,7 @@
 logger = logging.getLogger(__name__)
 
 
-@validate_arguments(config=dict(arbitrary_types_allowed=True))
+@validate_arguments(config={"arbitrary_types_allowed": True})
 def process(
     df: pyam.IamDataFrame,
     dsd: DataStructureDefinition,

diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py
@@ -82,10 +82,9 @@ def is_single_constituent_region(self):
     def rename_dict(self):
         if self.is_single_constituent_region:
             return {self.constituent_regions[0]: self.name}
-        else:
-            raise AttributeError(
-                "rename_dict is only available for single constituent regions"
-            )
+        raise AttributeError(
+            "rename_dict is only available for single constituent regions"
+        )
 
 
 class RegionAggregationMapping(BaseModel):
@@ -223,20 +222,20 @@ def from_file(cls, file: Union[Path, str]):
         This function is used to convert a model mapping yaml file into a dictionary
         which is used to initialize a RegionAggregationMapping.
         """
-        SCHEMA_FILE = here / "../validation_schemas" / "region_mapping_schema.yaml"
+        schema_file = here / "../validation_schemas" / "region_mapping_schema.yaml"
         file = Path(file) if isinstance(file, str) else file
         with open(file, "r") as f:
             mapping_input = yaml.safe_load(f)
-        with open(SCHEMA_FILE, "r") as f:
+        with open(schema_file, "r") as f:
             schema = yaml.safe_load(f)
 
         # Validate the input data using jsonschema
         try:
             jsonschema.validate(mapping_input, schema)
-        except jsonschema.ValidationError as e:
+        except jsonschema.ValidationError as error:
             # Add file information in case of error
             raise jsonschema.ValidationError(
-                f"{e.message} in {get_relative_path(file)}"
+                f"{error.message} in {get_relative_path(file)}"
             )
 
         # Add the file name to mapping_input
@@ -245,24 +244,27 @@ def from_file(cls, file: Union[Path, str]):
         # Reformat the "native_regions"
         if "native_regions" in mapping_input:
             native_region_list: List[Dict] = []
-            for nr in mapping_input["native_regions"]:
-                if isinstance(nr, str):
-                    native_region_list.append({"name": nr})
-                elif isinstance(nr, dict):
+            for native_region in mapping_input["native_regions"]:
+                if isinstance(native_region, str):
+                    native_region_list.append({"name": native_region})
+                elif isinstance(native_region, dict):
                     native_region_list.append(
-                        {"name": list(nr)[0], "rename": list(nr.values())[0]}
+                        {
+                            "name": list(native_region)[0],
+                            "rename": list(native_region.values())[0],
+                        }
                     )
             mapping_input["native_regions"] = native_region_list
 
         # Reformat the "common_regions"
         if "common_regions" in mapping_input:
             common_region_list: List[Dict[str, List[Dict[str, str]]]] = []
-            for cr in mapping_input["common_regions"]:
-                cr_name = list(cr)[0]
+            for common_region in mapping_input["common_regions"]:
+                common_region_name = list(common_region)[0]
                 common_region_list.append(
                     {
-                        "name": cr_name,
-                        "constituent_regions": cr[cr_name],
+                        "name": common_region_name,
+                        "constituent_regions": common_region[common_region_name],
                     }
                 )
             mapping_input["common_regions"] = common_region_list
@@ -349,22 +351,22 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition):
         for file in (f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}):
             try:
                 mapping = RegionAggregationMapping.from_file(file)
-                for m in mapping.model:
-                    if m not in mapping_dict:
-                        mapping_dict[m] = mapping
+                for model in mapping.model:
+                    if model not in mapping_dict:
+                        mapping_dict[model] = mapping
                     else:
                         errors.append(
                             ErrorWrapper(
                                 ModelMappingCollisionError(
-                                    model=m,
+                                    model=model,
                                     file1=mapping.file,
-                                    file2=mapping_dict[m].file,
+                                    file2=mapping_dict[model].file,
                                 ),
                                 "__root__",
                             )
                         )
-            except (pydantic.ValidationError, jsonschema.ValidationError) as e:
-                errors.append(ErrorWrapper(e, "__root__"))
+            except (pydantic.ValidationError, jsonschema.ValidationError) as error:
+                errors.append(ErrorWrapper(error, "__root__"))
 
         if errors:
             raise pydantic.ValidationError(errors, model=RegionProcessor)
@@ -457,18 +459,18 @@ def _apply_region_processing(self, model_df: IamDataFrame) -> IamDataFrame:
 
             # aggregate common regions
             if self.mappings[model].common_regions is not None:
-                for cr in self.mappings[model].common_regions:
+                for common_region in self.mappings[model].common_regions:
                     # if a common region is consists of a single native region, rename
-                    if cr.is_single_constituent_region:
-                        _df = model_df.filter(region=cr.constituent_regions[0]).rename(
-                            region=cr.rename_dict
-                        )
+                    if common_region.is_single_constituent_region:
+                        _df = model_df.filter(
+                            region=common_region.constituent_regions[0]
+                        ).rename(region=common_region.rename_dict)
                         if not _df.empty:
                             _processed_data.append(_df._data)
                         continue
 
                     # if there are multiple constituent regions, aggregate
-                    regions = [cr.name, cr.constituent_regions]
+                    regions = [common_region.name, common_region.constituent_regions]
 
                     # first, perform 'simple' aggregation (no arguments)
                     simple_vars = [
@@ -540,13 +542,13 @@ def _aggregate_region(df, var, *regions, **kwargs):
     """Perform region aggregation with kwargs catching inconsistent-index errors"""
     try:
         return df.aggregate_region(var, *regions, **kwargs)
-    except ValueError as e:
-        if str(e) == "Inconsistent index between variable and weight!":
+    except ValueError as error:
+        if str(error) == "Inconsistent index between variable and weight!":
             logger.info(
                 f"Could not aggregate '{var}' for region '{regions[0]}' ({kwargs})"
             )
         else:
-            raise e
+            raise error
 
 
 def _compare_and_merge(original: pd.Series, aggregated: pd.Series) -> IamDataFrame:

diff --git a/nomenclature/processor/required_data.py b/nomenclature/processor/required_data.py
@@ -63,8 +63,8 @@ def validate_with_definition(self, dsd: DataStructureDefinition) -> None:
             ("region", "region"),
             ("variable", "variables"),
         ):
-            if invalid := dsd.__getattribute__(dimension).validate_items(
-                self.__getattribute__(attribute_name) or []
+            if invalid := getattr(dsd, dimension).validate_items(
+                getattr(self, attribute_name) or []
             ):
                 error_msg += (
                     f"The following {dimension}(s) were not found in the "

diff --git a/nomenclature/testing.py b/nomenclature/testing.py
@@ -63,11 +63,11 @@ def _check_mappings(
         raise FileNotFoundError(f"Mappings directory not found: {path / mappings}")
 
 
-def _collect_requiredData_errors(
-    requiredDatadir: Path, dsd: DataStructureDefinition
+def _collect_RequiredData_errors(
+    required_data_dir: Path, dsd: DataStructureDefinition
 ) -> None:
     errors: List[str] = []
-    for file in (requiredDatadir).iterdir():
+    for file in required_data_dir.iterdir():
         try:
             RequiredDataValidator.from_file(file).validate_with_definition(dsd)
         except pydantic.ValidationError as pve:
@@ -77,7 +77,7 @@ def _collect_requiredData_errors(
         raise ValueError(f"Found error(s) in required data files: {all_errors}")
 
 
-def _check_requiredData(
+def _check_RequiredData(
     path: Path,
     definitions: str = "definitions",
     dimensions: Optional[List[str]] = None,
@@ -86,10 +86,10 @@ def _check_requiredData(
     dsd = DataStructureDefinition(path / definitions, dimensions)
     if required_data is None:
         if (path / "requiredData").is_dir():
-            _collect_requiredData_errors(path / "required_data", dsd)
+            _collect_RequiredData_errors(path / "required_data", dsd)
 
     elif (path / required_data).is_dir():
-        _collect_requiredData_errors(path / required_data, dsd)
+        _collect_RequiredData_errors(path / required_data, dsd)
     else:
         raise FileNotFoundError(
             f"Directory for required data not found at: {path / required_data}"
@@ -143,7 +143,7 @@ def assert_valid_structure(
                 f"`definitions` directory is empty: {path / definitions}"
             )
     _check_mappings(path, definitions, dimensions, mappings)
-    _check_requiredData(path, definitions, dimensions, required_data)
+    _check_RequiredData(path, definitions, dimensions, required_data)
 
 
 # Todo: add function which runs `DataStructureDefinition(path).validate(scenario)`
diff --git a/nomenclature/validation.py b/nomenclature/validation.py
@@ -25,9 +25,7 @@ def validate(dsd, df, dimensions):
     error = False
 
     for dim in dimensions:
-        if invalid := dsd.__getattribute__(dim).validate_items(
-            df.__getattribute__(dim)
-        ):
+        if invalid := getattr(dsd, dim).validate_items(getattr(df, dim)):
             log_error(dim, invalid)
             error = True
 

diff --git a/tests/data/excel_io/validation_nc_list_arg.yaml b/tests/data/excel_io/validation_nc_list_arg.yaml
@@ -2,8 +2,8 @@
     description: Total primary energy consumption
     unit: EJ/yr
     region-aggregation:
-      - Primary Energy (mean):
-          method: mean
+    - Primary Energy (mean):
+        method: mean
 - Primary Energy (mean):
     description: Mean primary energy consumption
     unit: EJ/yr